Import the data

library(skimr)

# Import the data
df <- read.csv("merged_LTC_odhf_quality.csv", na.strings="", header=TRUE)
skim(df)
Data summary
Name df
Number of rows 615
Number of columns 38
_______________________
Column type frequency:
factor 16
numeric 22
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
name 0 1.00 FALSE 615 AFT: 1, ALB: 1, ALE: 1, ALG: 1
cleaned_name 0 1.00 FALSE 615 aft: 1, alb: 1, ale: 1, alg: 1
address 0 1.00 FALSE 614 180: 2, 1 B: 1, 1 M: 1, 1 N: 1
LHIN 0 1.00 FALSE 14 Ham: 85, Sou: 76, Cen: 68, Cha: 57
home_type 0 1.00 FALSE 3 For: 351, Non: 164, Mun: 100
city 0 1.00 FALSE 263 Tor: 37, Sca: 20, Ham: 16, Ott: 16
postal_code 0 1.00 FALSE 594 K0: 4, K0: 2, K0: 2, K0: 2
short_stay 0 1.00 FALSE 2 No: 412, Yes: 203
residents_council 0 1.00 FALSE 2 Yes: 606, No: 9
family_council 0 1.00 FALSE 2 Yes: 510, No: 105
accreditation 0 1.00 FALSE 2 Yes: 511, No: 104
confirmed_resident_cases 486 0.21 FALSE 38 <5: 48, 0: 36, 25: 3, 28: 3
resident_deaths 323 0.47 FALSE 36 0: 195, <5: 25, 11: 9, 18: 6
confirmed_staff_cases 486 0.21 FALSE 30 <5: 53, 0: 24, 10: 4, 22: 4
status 323 0.47 FALSE 2 Ina: 163, Act: 129
CSDname 1 1.00 FALSE 208 Tor: 85, Ott: 27, Ham: 26, Lon: 15

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
total_inspections 0 1 37.68 21.95 9.00 22.00 32.00 47.00 172.00 ▇▃▁▁▁
X5y_inspections 0 1 19.82 11.95 5.00 12.00 17.00 24.00 86.00 ▇▃▁▁▁
X2y_inspections 0 1 9.20 5.59 1.00 5.00 8.00 12.00 44.00 ▇▅▁▁▁
total_complaints 0 1 14.70 11.64 0.00 7.00 12.00 20.00 94.00 ▇▂▁▁▁
X5y_complaints 0 1 6.60 5.93 0.00 2.00 5.00 9.00 46.00 ▇▂▁▁▁
X2y_complaints 0 1 3.20 2.86 0.00 1.00 3.00 5.00 20.00 ▇▂▁▁▁
total_critical 0 1 12.61 9.17 0.00 6.50 11.00 16.00 62.00 ▇▃▁▁▁
X5y_critical 0 1 6.54 4.82 0.00 3.00 5.00 8.00 40.00 ▇▂▁▁▁
X2y_critical 0 1 3.98 2.54 0.00 2.00 3.00 5.00 26.00 ▇▂▁▁▁
total_withOrders 0 1 6.88 6.17 0.00 3.00 5.00 9.00 43.00 ▇▂▁▁▁
X5y_withOrders 0 1 4.41 4.35 0.00 2.00 3.00 6.00 34.00 ▇▂▁▁▁
X2y_withOrders 0 1 1.85 2.20 0.00 0.00 1.00 3.00 16.00 ▇▁▁▁▁
antipsychotic_percent 0 1 18.47 7.74 0.00 13.45 18.00 23.00 62.80 ▃▇▂▁▁
depression_percent 0 1 23.40 10.09 1.50 15.25 23.40 31.40 48.40 ▃▇▇▇▂
falls_percent 0 1 16.49 4.83 1.80 13.35 16.40 19.70 33.20 ▁▅▇▂▁
pressure_ulcers_percent 0 1 2.74 1.53 0.00 1.60 2.60 3.60 11.00 ▇▇▂▁▁
pain_percent 0 1 5.67 5.77 0.00 1.95 3.90 7.50 48.00 ▇▂▁▁▁
restraints_percent 0 1 4.21 5.75 0.00 0.40 2.20 5.80 42.00 ▇▁▁▁▁
number_beds 0 1 127.27 73.83 12.00 69.00 120.00 160.00 543.00 ▇▇▁▁▁
CSDuid 2 1 3527707.51 14148.84 3501005.00 3520005.00 3525005.00 3539005.00 3560042.00 ▃▇▅▅▂
latitude 1 1 44.20 1.47 42.03 43.32 43.76 44.55 51.01 ▇▅▁▁▁
longitude 1 1 -79.62 6.99 -94.57 -80.75 -79.63 -79.17 79.63 ▇▁▁▁▁

Add derived variables

# Add a column with outbreak status
df$outbreak <- with(df, ifelse(is.na(status), 'no', 'yes'))

# Add a column of LHIN health regions
unique(df[['LHIN']])
##  [1] Hamilton Niagara Haldimand Brant (Hnhb)
##  [2] North West                             
##  [3] Erie St. Clair                         
##  [4] Champlain                              
##  [5] Toronto Central                        
##  [6] Waterloo Wellington                    
##  [7] North Simcoe Muskoka                   
##  [8] Central East                           
##  [9] North East                             
## [10] South East                             
## [11] South West                             
## [12] Central                                
## [13] Mississauga Halton                     
## [14] Central West                           
## 14 Levels: Central Central East Central West Champlain ... Waterloo Wellington
df$region[df$LHIN=='North West' | df$LHIN=='North East'] <- "North"
df$region[df$LHIN=='Toronto Central'] <- "Toronto"
df$region[df$LHIN=='Champlain' | df$LHIN=='Central East' | df$LHIN=='South East'] <- "East"
df$region[df$LHIN=='Erie St. Clair' | df$LHIN=='Hamilton Niagara Haldimand Brant (Hnhb)' | df$LHIN=='South West' | df$LHIN=='Waterloo Wellington'] <- "West"
df$region[df$LHIN=='Mississauga Halton' | df$LHIN=='Central West' | df$LHIN=='Central' | df$LHIN=='North Simcoe Muskoka'] <- "Central"

# Sum of complaints and critical
df$total_cc <- df$total_complaints + df$total_critical
df$X5y_cc <- df$X5y_complaints + df$X5y_critical
df$X2y_cc <- df$X2y_complaints + df$X2y_critical

# Sum of complaints, critical and withOrders
df$total_ccw <- df$total_critical + df$total_complaints + df$total_withOrders
df$X5y_ccw <- df$X5y_complaints + df$X5y_critical + df$X5y_withOrders
df$X2y_ccw <- df$X2y_complaints + df$X2y_critical + df$X2y_withOrders

# Number of non-complaints inspections
df$total_noncomplaints <- df$total_inspections - df$total_complaints
df$X5y_noncomplaints <- df$X5y_inspections - df$X5y_complaints
df$X2y_noncomplaints <- df$X2y_inspections - df$X2y_complaints

Change variable types

# Update data types for some columns
df[, 'outbreak'] <- as.factor(df[, 'outbreak'])
df[, 'region'] <- as.factor(df[, 'region'])

# Change data type of character
listofcol = list('name', 'cleaned_name', 'address', 'city', 'postal_code', 'CSDname')
for (each in listofcol){
  df[, each] <- as.character(df[, each])
}

skim(df)
Data summary
Name df
Number of rows 615
Number of columns 49
_______________________
Column type frequency:
character 6
factor 12
numeric 31
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
name 0 1 7 84 0 615 0
cleaned_name 0 1 7 84 0 615 0
address 0 1 8 61 0 614 0
city 0 1 3 19 0 263 0
postal_code 0 1 7 7 0 594 615
CSDname 1 1 3 39 0 208 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
LHIN 0 1.00 FALSE 14 Ham: 85, Sou: 76, Cen: 68, Cha: 57
home_type 0 1.00 FALSE 3 For: 351, Non: 164, Mun: 100
short_stay 0 1.00 FALSE 2 No: 412, Yes: 203
residents_council 0 1.00 FALSE 2 Yes: 606, No: 9
family_council 0 1.00 FALSE 2 Yes: 510, No: 105
accreditation 0 1.00 FALSE 2 Yes: 511, No: 104
confirmed_resident_cases 486 0.21 FALSE 38 <5: 48, 0: 36, 25: 3, 28: 3
resident_deaths 323 0.47 FALSE 36 0: 195, <5: 25, 11: 9, 18: 6
confirmed_staff_cases 486 0.21 FALSE 30 <5: 53, 0: 24, 10: 4, 22: 4
status 323 0.47 FALSE 2 Ina: 163, Act: 129
outbreak 0 1.00 FALSE 2 no: 323, yes: 292
region 0 1.00 FALSE 5 Wes: 233, Eas: 161, Cen: 122, Nor: 63

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
total_inspections 0 1 37.68 21.95 9.00 22.00 32.00 47.00 172.00 ▇▃▁▁▁
X5y_inspections 0 1 19.82 11.95 5.00 12.00 17.00 24.00 86.00 ▇▃▁▁▁
X2y_inspections 0 1 9.20 5.59 1.00 5.00 8.00 12.00 44.00 ▇▅▁▁▁
total_complaints 0 1 14.70 11.64 0.00 7.00 12.00 20.00 94.00 ▇▂▁▁▁
X5y_complaints 0 1 6.60 5.93 0.00 2.00 5.00 9.00 46.00 ▇▂▁▁▁
X2y_complaints 0 1 3.20 2.86 0.00 1.00 3.00 5.00 20.00 ▇▂▁▁▁
total_critical 0 1 12.61 9.17 0.00 6.50 11.00 16.00 62.00 ▇▃▁▁▁
X5y_critical 0 1 6.54 4.82 0.00 3.00 5.00 8.00 40.00 ▇▂▁▁▁
X2y_critical 0 1 3.98 2.54 0.00 2.00 3.00 5.00 26.00 ▇▂▁▁▁
total_withOrders 0 1 6.88 6.17 0.00 3.00 5.00 9.00 43.00 ▇▂▁▁▁
X5y_withOrders 0 1 4.41 4.35 0.00 2.00 3.00 6.00 34.00 ▇▂▁▁▁
X2y_withOrders 0 1 1.85 2.20 0.00 0.00 1.00 3.00 16.00 ▇▁▁▁▁
antipsychotic_percent 0 1 18.47 7.74 0.00 13.45 18.00 23.00 62.80 ▃▇▂▁▁
depression_percent 0 1 23.40 10.09 1.50 15.25 23.40 31.40 48.40 ▃▇▇▇▂
falls_percent 0 1 16.49 4.83 1.80 13.35 16.40 19.70 33.20 ▁▅▇▂▁
pressure_ulcers_percent 0 1 2.74 1.53 0.00 1.60 2.60 3.60 11.00 ▇▇▂▁▁
pain_percent 0 1 5.67 5.77 0.00 1.95 3.90 7.50 48.00 ▇▂▁▁▁
restraints_percent 0 1 4.21 5.75 0.00 0.40 2.20 5.80 42.00 ▇▁▁▁▁
number_beds 0 1 127.27 73.83 12.00 69.00 120.00 160.00 543.00 ▇▇▁▁▁
CSDuid 2 1 3527707.51 14148.84 3501005.00 3520005.00 3525005.00 3539005.00 3560042.00 ▃▇▅▅▂
latitude 1 1 44.20 1.47 42.03 43.32 43.76 44.55 51.01 ▇▅▁▁▁
longitude 1 1 -79.62 6.99 -94.57 -80.75 -79.63 -79.17 79.63 ▇▁▁▁▁
total_cc 0 1 27.32 19.14 0.00 14.00 23.00 35.50 153.00 ▇▃▁▁▁
X5y_cc 0 1 13.14 9.91 0.00 7.00 10.00 17.00 63.00 ▇▅▁▁▁
X2y_cc 0 1 7.17 4.66 0.00 4.00 6.00 9.00 34.00 ▇▆▁▁▁
total_ccw 0 1 34.20 22.68 2.00 18.00 28.00 44.00 165.00 ▇▃▁▁▁
X5y_ccw 0 1 17.54 12.63 1.00 9.00 15.00 22.00 93.00 ▇▃▁▁▁
X2y_ccw 0 1 9.02 6.00 0.00 5.00 8.00 12.00 45.00 ▇▃▁▁▁
total_noncomplaints 0 1 22.98 12.09 5.00 15.00 20.00 28.00 90.00 ▇▃▁▁▁
X5y_noncomplaints 0 1 13.23 7.11 4.00 9.00 11.00 15.00 68.00 ▇▂▁▁▁
X2y_noncomplaints 0 1 6.00 3.64 0.00 4.00 5.00 8.00 40.00 ▇▂▁▁▁

Generate the data for analysis

# Create lists of variables used to filter data for analysis
keep = c("outbreak", "home_type", "number_beds")
profile = c("short_stay", "residents_council", "family_council", "accreditation")
location = c('address', 'LHIN', 'region', 'city', 'postal_code', 'CSDname', 'CSDuid', 'latitude', 'longitude')
quality = c("antipsychotic_percent", "depression_percent", "falls_percent", "pressure_ulcers_percent", "pain_percent")

all_inspections = c("total_inspections", "X5y_inspections", "X2y_inspections")
complaints = c("total_complaints", "X5y_complaints", "X2y_complaints")
critical = c("total_critical", "X5y_critical", "X2y_critical") 
noncomplaints = c("total_noncomplaints", "X5y_noncomplaints", "X2y_noncomplaints")
withOrders = c("total_withOrders", "X5y_withOrders", "X2y_withOrders")
cc_ccw = c("total_cc", "X5y_cc", "X2y_cc", "total_ccw", "X5y_ccw", "X2y_ccw")

covid = c("confirmed_resident_cases", "resident_deaths", "confirmed_staff_cases")

data <- subset(df, select = c(keep, profile, 'region', quality, all_inspections, complaints, critical, noncomplaints, withOrders, cc_ccw))

Assessing skewness

# Check for R skewness in numeric predictors
library(e1071)

listofcols = c('number_beds', quality, all_inspections, complaints, noncomplaints, critical, withOrders, cc_ccw)

for (each in listofcols){
  print(each)
  print(skewness(data[[each]]))
  qqnorm(data[[each]], main = each)
  qqline(data[[each]], col = 'red')
}
## [1] "number_beds"
## [1] 1.510832

## [1] "antipsychotic_percent"
## [1] 0.7192037

## [1] "depression_percent"
## [1] 0.02250263

## [1] "falls_percent"
## [1] 0.1987631

## [1] "pressure_ulcers_percent"
## [1] 0.9435391

## [1] "pain_percent"
## [1] 2.534039

## [1] "total_inspections"
## [1] 1.563433

## [1] "X5y_inspections"
## [1] 1.991566

## [1] "X2y_inspections"
## [1] 1.947724

## [1] "total_complaints"
## [1] 1.716117

## [1] "X5y_complaints"
## [1] 1.927852

## [1] "X2y_complaints"
## [1] 1.984473

## [1] "total_noncomplaints"
## [1] 1.729386

## [1] "X5y_noncomplaints"
## [1] 2.374406

## [1] "X2y_noncomplaints"
## [1] 2.632259

## [1] "total_critical"
## [1] 1.845144

## [1] "X5y_critical"
## [1] 2.089656

## [1] "X2y_critical"
## [1] 2.263202

## [1] "total_withOrders"
## [1] 1.988978

## [1] "X5y_withOrders"
## [1] 2.57025

## [1] "X2y_withOrders"
## [1] 2.199904

## [1] "total_cc"
## [1] 1.66822

## [1] "X5y_cc"
## [1] 1.852097

## [1] "X2y_cc"
## [1] 1.747409

## [1] "total_ccw"
## [1] 1.457927

## [1] "X5y_ccw"
## [1] 1.94936

## [1] "X2y_ccw"
## [1] 1.847578

We see above that the inspections and number of beds data are highly right skewed with skewness values > 1. The quality data are not as right skewed with skewness values between 0 and 1, except for pain_percent which has a skewness value of 2.5.

The highly skewed variables will have to be transformed for the analysis, but first we explore the variables to assess for any zero values.

for (each in listofcols){
  print(each)
  print(summary(df[[each]]))
}
## [1] "number_beds"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    12.0    69.0   120.0   127.3   160.0   543.0 
## [1] "antipsychotic_percent"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   13.45   18.00   18.47   23.00   62.80 
## [1] "depression_percent"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.50   15.25   23.40   23.40   31.40   48.40 
## [1] "falls_percent"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.80   13.35   16.40   16.49   19.70   33.20 
## [1] "pressure_ulcers_percent"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    1.60    2.60    2.74    3.60   11.00 
## [1] "pain_percent"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.950   3.900   5.666   7.500  48.000 
## [1] "total_inspections"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9.00   22.00   32.00   37.68   47.00  172.00 
## [1] "X5y_inspections"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   12.00   17.00   19.82   24.00   86.00 
## [1] "X2y_inspections"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   5.000   8.000   9.195  12.000  44.000 
## [1] "total_complaints"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     7.0    12.0    14.7    20.0    94.0 
## [1] "X5y_complaints"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   2.000   5.000   6.595   9.000  46.000 
## [1] "X2y_complaints"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.000   3.000   3.195   5.000  20.000 
## [1] "total_noncomplaints"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   15.00   20.00   22.98   28.00   90.00 
## [1] "X5y_noncomplaints"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4.00    9.00   11.00   13.23   15.00   68.00 
## [1] "X2y_noncomplaints"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       4       5       6       8      40 
## [1] "total_critical"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    6.50   11.00   12.61   16.00   62.00 
## [1] "X5y_critical"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   3.000   5.000   6.543   8.000  40.000 
## [1] "X2y_critical"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   2.000   3.000   3.976   5.000  26.000 
## [1] "total_withOrders"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    3.00    5.00    6.88    9.00   43.00 
## [1] "X5y_withOrders"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   2.000   3.000   4.407   6.000  34.000 
## [1] "X2y_withOrders"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    1.00    1.85    3.00   16.00 
## [1] "total_cc"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   14.00   23.00   27.32   35.50  153.00 
## [1] "X5y_cc"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    7.00   10.00   13.14   17.00   63.00 
## [1] "X2y_cc"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   4.000   6.000   7.171   9.000  34.000 
## [1] "total_ccw"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     2.0    18.0    28.0    34.2    44.0   165.0 
## [1] "X5y_ccw"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    9.00   15.00   17.54   22.00   93.00 
## [1] "X2y_ccw"
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   5.000   8.000   9.021  12.000  45.000

We see above that the following variables contain zeros: - antipsychotic_percent - pressure_ulcers_percent - pain_percent - complaints group - critical group - withOrders group - cc group - X2y_noncomplaints - X2y_ccw

Explore the variables for transformation

# Create a list of numeric variables with 0 values
list0s <- c('antipsychotic_percent', 'pressure_ulcers_percent', 'pain_percent', complaints, 'X2y_noncomplaints', critical, withOrders, 'total_cc', 'X5y_cc', 'X2y_cc', 'X2y_ccw')

library(rcompanion)
listofcols <- setdiff(listofcols, list0s)

# Visualize the effect of square root and log transformations on the data without 0s
for (each in listofcols) {  
  plotNormalHistogram(x = data[[each]], main = each)
  plotNormalHistogram(x = sqrt(data[[each]]), main = c(each, 'sqrt trans'))
  plotNormalHistogram(x = log(data[[each]]), main = c(each, 'log trans'))
}

As shown above a square root transformation seems best for the following data: - number_beds

A log transformation seems better for the following data: - all_inspections group - total_noncomplaints - X5y_noncomplaints - total_ccw - X5y_ccw

# Visualize the effect of a square root transformation on the variables with 0s
for (each in list0s) {  
  plotNormalHistogram(x = data[[each]], main = each)
  plotNormalHistogram(x = sqrt(data[[each]]), main = c(each, 'sqrt trans'))
}

As shown above, in all cases where variables contain 0s, a square root transformation seems to improve the distribution.

Log transform the data

# Log transform in place
data[c(all_inspections, 'total_noncomplaints', 'X5y_noncomplaints', 'total_ccw', 'X5y_ccw')] <- log(data[c(all_inspections, 'total_noncomplaints', 'X5y_noncomplaints', 'total_ccw', 'X5y_ccw')])

# Square root transform in place
data[c('number_beds', list0s)] <- sqrt(data[c('number_beds', list0s)])

skim(data)
Data summary
Name data
Number of rows 615
Number of columns 34
_______________________
Column type frequency:
factor 7
numeric 27
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
outbreak 0 1 FALSE 2 no: 323, yes: 292
home_type 0 1 FALSE 3 For: 351, Non: 164, Mun: 100
short_stay 0 1 FALSE 2 No: 412, Yes: 203
residents_council 0 1 FALSE 2 Yes: 606, No: 9
family_council 0 1 FALSE 2 Yes: 510, No: 105
accreditation 0 1 FALSE 2 Yes: 511, No: 104
region 0 1 FALSE 5 Wes: 233, Eas: 161, Cen: 122, Nor: 63

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
number_beds 0 1 10.84 3.13 3.46 8.31 10.95 12.65 23.30 ▂▇▅▁▁
antipsychotic_percent 0 1 4.19 0.94 0.00 3.67 4.24 4.80 7.92 ▁▂▇▃▁
depression_percent 0 1 23.40 10.09 1.50 15.25 23.40 31.40 48.40 ▃▇▇▇▂
falls_percent 0 1 16.49 4.83 1.80 13.35 16.40 19.70 33.20 ▁▅▇▂▁
pressure_ulcers_percent 0 1 1.58 0.50 0.00 1.26 1.61 1.90 3.32 ▁▃▇▃▁
pain_percent 0 1 2.13 1.07 0.00 1.40 1.97 2.74 6.93 ▃▇▃▁▁
total_inspections 0 1 3.48 0.54 2.20 3.09 3.47 3.85 5.15 ▂▇▇▃▁
X5y_inspections 0 1 2.84 0.52 1.61 2.48 2.83 3.18 4.45 ▂▇▇▃▁
X2y_inspections 0 1 2.06 0.56 0.00 1.61 2.08 2.48 3.78 ▁▂▇▆▁
total_complaints 0 1 3.55 1.44 0.00 2.65 3.46 4.47 9.70 ▂▇▅▁▁
X5y_complaints 0 1 2.31 1.12 0.00 1.41 2.24 3.00 6.78 ▂▇▃▁▁
X2y_complaints 0 1 1.57 0.86 0.00 1.00 1.73 2.24 4.47 ▂▇▅▁▁
total_critical 0 1 3.35 1.19 0.00 2.55 3.32 4.00 7.87 ▁▇▇▂▁
X5y_critical 0 1 2.41 0.85 0.00 1.73 2.24 2.83 6.32 ▁▇▅▁▁
X2y_critical 0 1 1.90 0.61 0.00 1.41 1.73 2.24 5.10 ▂▇▃▁▁
total_noncomplaints 0 1 3.02 0.47 1.61 2.71 3.00 3.33 4.50 ▁▅▇▃▁
X5y_noncomplaints 0 1 2.47 0.45 1.39 2.20 2.40 2.71 4.22 ▂▇▆▂▁
X2y_noncomplaints 0 1 2.36 0.67 0.00 2.00 2.24 2.83 6.32 ▁▇▅▁▁
total_withOrders 0 1 2.37 1.12 0.00 1.73 2.24 3.00 6.56 ▂▇▅▁▁
X5y_withOrders 0 1 1.84 1.00 0.00 1.41 1.73 2.45 5.83 ▅▇▅▁▁
X2y_withOrders 0 1 1.06 0.85 0.00 0.00 1.00 1.73 4.00 ▆▇▃▁▁
total_cc 0 1 4.94 1.70 0.00 3.74 4.80 5.96 12.37 ▁▇▆▁▁
X5y_cc 0 1 3.41 1.24 0.00 2.65 3.16 4.12 7.94 ▁▇▆▂▁
X2y_cc 0 1 2.55 0.82 0.00 2.00 2.45 3.00 5.83 ▁▇▇▂▁
total_ccw 0 1 3.32 0.67 0.69 2.89 3.33 3.78 5.11 ▁▂▇▇▂
X5y_ccw 0 1 2.64 0.69 0.00 2.20 2.71 3.09 4.53 ▁▂▇▇▂
X2y_ccw 0 1 2.86 0.93 0.00 2.24 2.83 3.46 6.71 ▁▇▇▂▁

Inspect the distribution of the categorical variables

xtabs(~outbreak + home_type, data=df)
##         home_type
## outbreak For-Profit Municipal Non-Profit
##      no         184        56         83
##      yes        167        44         81
xtabs(~accreditation + home_type, data=df)
##              home_type
## accreditation For-Profit Municipal Non-Profit
##           No          25        29         50
##           Yes        326        71        114
xtabs(~short_stay + home_type, data=df)
##           home_type
## short_stay For-Profit Municipal Non-Profit
##        No         248        53        111
##        Yes        103        47         53
xtabs(~family_council + home_type, data=df)
##               home_type
## family_council For-Profit Municipal Non-Profit
##            No          65        10         30
##            Yes        286        90        134
xtabs(~residents_council + home_type, data=df)
##                  home_type
## residents_council For-Profit Municipal Non-Profit
##               No           5         0          4
##               Yes        346       100        160
xtabs(~residents_council + outbreak, data=df)
##                  outbreak
## residents_council  no yes
##               No    3   6
##               Yes 320 286
xtabs(~accreditation + outbreak, data=df)
##              outbreak
## accreditation  no yes
##           No   62  42
##           Yes 261 250
xtabs(~short_stay + outbreak, data=df)
##           outbreak
## short_stay  no yes
##        No  219 193
##        Yes 104  99
xtabs(~family_council + outbreak, data=df)
##               outbreak
## family_council  no yes
##            No   68  37
##            Yes 255 255
xtabs(~family_council + residents_council, data=df)
##               residents_council
## family_council  No Yes
##            No    5 100
##            Yes   4 506
# Fit logistic regression using transformed inspection counts
fit = glm(outbreak~., family=binomial, data=data)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ ., family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2487  -0.9295  -0.4374   0.9504   2.1831  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              3.143193   2.840671   1.106   0.2685    
## home_typeMunicipal      -0.533617   0.294532  -1.812   0.0700 .  
## home_typeNon-Profit      0.048595   0.251397   0.193   0.8467    
## number_beds              0.232655   0.050627   4.595 4.32e-06 ***
## short_stayYes           -0.181057   0.210418  -0.860   0.3895    
## residents_councilYes    -0.521201   0.953494  -0.547   0.5846    
## family_councilYes        0.157426   0.267084   0.589   0.5556    
## accreditationYes        -0.236254   0.278384  -0.849   0.3961    
## regionEast              -0.237564   0.324196  -0.733   0.4637    
## regionNorth             -0.946643   0.427283  -2.215   0.0267 *  
## regionToronto            0.689524   0.484719   1.423   0.1549    
## regionWest              -0.511712   0.290713  -1.760   0.0784 .  
## antipsychotic_percent    0.124144   0.105996   1.171   0.2415    
## depression_percent       0.005139   0.010747   0.478   0.6325    
## falls_percent           -0.012270   0.021731  -0.565   0.5723    
## pressure_ulcers_percent -0.012357   0.228484  -0.054   0.9569    
## pain_percent            -0.128315   0.098233  -1.306   0.1915    
## total_inspections       -7.622314   4.436957  -1.718   0.0858 .  
## X5y_inspections          5.059650   3.793593   1.334   0.1823    
## X2y_inspections         -1.082597   1.207296  -0.897   0.3699    
## total_complaints         0.862600   0.978836   0.881   0.3782    
## X5y_complaints           0.087526   0.839495   0.104   0.9170    
## X2y_complaints          -0.267613   0.604419  -0.443   0.6579    
## total_critical          -0.544059   1.194274  -0.456   0.6487    
## X5y_critical             1.538881   1.340180   1.148   0.2509    
## X2y_critical            -1.358774   0.965301  -1.408   0.1592    
## total_noncomplaints      3.844665   2.718140   1.414   0.1572    
## X5y_noncomplaints       -4.774187   2.600318  -1.836   0.0664 .  
## X2y_noncomplaints        1.090983   0.776042   1.406   0.1598    
## total_withOrders        -0.262043   0.346652  -0.756   0.4497    
## X5y_withOrders           0.756159   0.394837   1.915   0.0555 .  
## X2y_withOrders           0.135237   0.422691   0.320   0.7490    
## total_cc                 0.717939   1.517787   0.473   0.6362    
## X5y_cc                  -1.305044   1.571094  -0.831   0.4062    
## X2y_cc                   2.055378   1.531700   1.342   0.1796    
## total_ccw                1.548906   1.813860   0.854   0.3931    
## X5y_ccw                 -0.942757   1.309266  -0.720   0.4715    
## X2y_ccw                 -1.033641   1.096100  -0.943   0.3457    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 695.67  on 577  degrees of freedom
## AIC: 771.67
## 
## Number of Fisher Scoring iterations: 4

Backwards selection

fit <- update(fit, .~. -pressure_ulcers_percent)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay + 
##     residents_council + family_council + accreditation + region + 
##     antipsychotic_percent + depression_percent + falls_percent + 
##     pain_percent + total_inspections + X5y_inspections + X2y_inspections + 
##     total_complaints + X5y_complaints + X2y_complaints + total_critical + 
##     X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + total_withOrders + X5y_withOrders + X2y_withOrders + 
##     total_cc + X5y_cc + X2y_cc + total_ccw + X5y_ccw + X2y_ccw, 
##     family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2486  -0.9255  -0.4352   0.9507   2.1813  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            3.13019    2.83094   1.106   0.2689    
## home_typeMunicipal    -0.53409    0.29440  -1.814   0.0696 .  
## home_typeNon-Profit    0.04886    0.25133   0.194   0.8458    
## number_beds            0.23273    0.05061   4.598 4.26e-06 ***
## short_stayYes         -0.18205    0.20962  -0.868   0.3851    
## residents_councilYes  -0.52076    0.95282  -0.547   0.5847    
## family_councilYes      0.15860    0.26620   0.596   0.5513    
## accreditationYes      -0.23458    0.27665  -0.848   0.3965    
## regionEast            -0.23736    0.32417  -0.732   0.4640    
## regionNorth           -0.94476    0.42591  -2.218   0.0265 *  
## regionToronto          0.68882    0.48450   1.422   0.1551    
## regionWest            -0.51201    0.29065  -1.762   0.0781 .  
## antipsychotic_percent  0.12318    0.10450   1.179   0.2385    
## depression_percent     0.00499    0.01039   0.480   0.6309    
## falls_percent         -0.01241    0.02158  -0.575   0.5653    
## pain_percent          -0.12866    0.09802  -1.313   0.1893    
## total_inspections     -7.61806    4.43813  -1.717   0.0861 .  
## X5y_inspections        5.05227    3.79136   1.333   0.1827    
## X2y_inspections       -1.08154    1.20721  -0.896   0.3703    
## total_complaints       0.86025    0.97812   0.879   0.3791    
## X5y_complaints         0.08803    0.83947   0.105   0.9165    
## X2y_complaints        -0.26833    0.60424  -0.444   0.6570    
## total_critical        -0.54448    1.19461  -0.456   0.6485    
## X5y_critical           1.53645    1.33935   1.147   0.2513    
## X2y_critical          -1.35840    0.96522  -1.407   0.1593    
## total_noncomplaints    3.84332    2.71878   1.414   0.1575    
## X5y_noncomplaints     -4.77013    2.59908  -1.835   0.0665 .  
## X2y_noncomplaints      1.08974    0.77560   1.405   0.1600    
## total_withOrders      -0.26317    0.34604  -0.761   0.4469    
## X5y_withOrders         0.75559    0.39471   1.914   0.0556 .  
## X2y_withOrders         0.13512    0.42277   0.320   0.7493    
## total_cc               0.71902    1.51816   0.474   0.6358    
## X5y_cc                -1.30328    1.57069  -0.830   0.4067    
## X2y_cc                 2.05514    1.53186   1.342   0.1797    
## total_ccw              1.55105    1.81438   0.855   0.3926    
## X5y_ccw               -0.94317    1.30954  -0.720   0.4714    
## X2y_ccw               -1.03245    1.09623  -0.942   0.3463    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 695.68  on 578  degrees of freedom
## AIC: 769.68
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_complaints)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay + 
##     residents_council + family_council + accreditation + region + 
##     antipsychotic_percent + depression_percent + falls_percent + 
##     pain_percent + total_inspections + X5y_inspections + X2y_inspections + 
##     total_complaints + X2y_complaints + total_critical + X5y_critical + 
##     X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + total_withOrders + X5y_withOrders + X2y_withOrders + 
##     total_cc + X5y_cc + X2y_cc + total_ccw + X5y_ccw + X2y_ccw, 
##     family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2466  -0.9274  -0.4355   0.9514   2.1806  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            3.092761   2.807636   1.102   0.2707    
## home_typeMunicipal    -0.534879   0.294283  -1.818   0.0691 .  
## home_typeNon-Profit    0.048687   0.251311   0.194   0.8464    
## number_beds            0.232734   0.050602   4.599 4.24e-06 ***
## short_stayYes         -0.180455   0.209066  -0.863   0.3881    
## residents_councilYes  -0.521754   0.953067  -0.547   0.5841    
## family_councilYes      0.159551   0.266011   0.600   0.5486    
## accreditationYes      -0.234674   0.276638  -0.848   0.3963    
## regionEast            -0.236248   0.324019  -0.729   0.4659    
## regionNorth           -0.944141   0.425852  -2.217   0.0266 *  
## regionToronto          0.686985   0.483802   1.420   0.1556    
## regionWest            -0.509040   0.289270  -1.760   0.0785 .  
## antipsychotic_percent  0.123557   0.104466   1.183   0.2369    
## depression_percent     0.004998   0.010382   0.481   0.6302    
## falls_percent         -0.012512   0.021555  -0.580   0.5616    
## pain_percent          -0.129577   0.097639  -1.327   0.1845    
## total_inspections     -7.635450   4.434500  -1.722   0.0851 .  
## X5y_inspections        5.094915   3.765898   1.353   0.1761    
## X2y_inspections       -1.105348   1.185249  -0.933   0.3510    
## total_complaints       0.889025   0.937585   0.948   0.3430    
## X2y_complaints        -0.232821   0.500932  -0.465   0.6421    
## total_critical        -0.520950   1.173006  -0.444   0.6570    
## X5y_critical           1.458860   1.114064   1.309   0.1904    
## X2y_critical          -1.326826   0.917013  -1.447   0.1479    
## total_noncomplaints    3.856397   2.715546   1.420   0.1556    
## X5y_noncomplaints     -4.808255   2.570222  -1.871   0.0614 .  
## X2y_noncomplaints      1.104618   0.762395   1.449   0.1474    
## total_withOrders      -0.265594   0.345295  -0.769   0.4418    
## X5y_withOrders         0.755039   0.394477   1.914   0.0556 .  
## X2y_withOrders         0.133755   0.422690   0.316   0.7517    
## total_cc               0.676429   1.462093   0.463   0.6436    
## X5y_cc                -1.191395   1.151383  -1.035   0.3008    
## X2y_cc                 2.012299   1.476688   1.363   0.1730    
## total_ccw              1.573021   1.802839   0.873   0.3829    
## X5y_ccw               -0.936432   1.307462  -0.716   0.4739    
## X2y_ccw               -1.033976   1.096578  -0.943   0.3457    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 695.69  on 579  degrees of freedom
## AIC: 767.69
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_critical)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay + 
##     residents_council + family_council + accreditation + region + 
##     antipsychotic_percent + depression_percent + falls_percent + 
##     pain_percent + total_inspections + X5y_inspections + X2y_inspections + 
##     total_complaints + X2y_complaints + X5y_critical + X2y_critical + 
##     total_noncomplaints + X5y_noncomplaints + X2y_noncomplaints + 
##     total_withOrders + X5y_withOrders + X2y_withOrders + total_cc + 
##     X5y_cc + X2y_cc + total_ccw + X5y_ccw + X2y_ccw, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2667  -0.9303  -0.4311   0.9567   2.2094  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.878176   2.775590   1.037   0.2998    
## home_typeMunicipal    -0.531639   0.294213  -1.807   0.0708 .  
## home_typeNon-Profit    0.055481   0.250986   0.221   0.8251    
## number_beds            0.229874   0.050170   4.582 4.61e-06 ***
## short_stayYes         -0.175316   0.208647  -0.840   0.4008    
## residents_councilYes  -0.540071   0.958702  -0.563   0.5732    
## family_councilYes      0.156236   0.265851   0.588   0.5567    
## accreditationYes      -0.237742   0.276805  -0.859   0.3904    
## regionEast            -0.230775   0.323602  -0.713   0.4758    
## regionNorth           -0.932191   0.424930  -2.194   0.0283 *  
## regionToronto          0.676336   0.482319   1.402   0.1608    
## regionWest            -0.499104   0.288146  -1.732   0.0833 .  
## antipsychotic_percent  0.123328   0.104553   1.180   0.2382    
## depression_percent     0.005069   0.010366   0.489   0.6248    
## falls_percent         -0.012717   0.021541  -0.590   0.5550    
## pain_percent          -0.127711   0.097558  -1.309   0.1905    
## total_inspections     -6.462434   3.554181  -1.818   0.0690 .  
## X5y_inspections        4.472021   3.479202   1.285   0.1987    
## X2y_inspections       -1.085330   1.182591  -0.918   0.3587    
## total_complaints       1.171973   0.712002   1.646   0.0998 .  
## X2y_complaints        -0.266187   0.495383  -0.537   0.5910    
## X5y_critical           1.247180   1.001368   1.245   0.2130    
## X2y_critical          -1.337686   0.917922  -1.457   0.1450    
## total_noncomplaints    3.060035   2.036657   1.502   0.1330    
## X5y_noncomplaints     -4.329196   2.319221  -1.867   0.0619 .  
## X2y_noncomplaints      1.091251   0.761249   1.434   0.1517    
## total_withOrders      -0.202373   0.313578  -0.645   0.5187    
## X5y_withOrders         0.707450   0.378232   1.870   0.0614 .  
## X2y_withOrders         0.131363   0.424618   0.309   0.7570    
## total_cc               0.081704   0.603343   0.135   0.8923    
## X5y_cc                -1.025557   1.082918  -0.947   0.3436    
## X2y_cc                 2.029400   1.479307   1.372   0.1701    
## total_ccw              1.216711   1.605697   0.758   0.4486    
## X5y_ccw               -0.777883   1.255114  -0.620   0.5354    
## X2y_ccw               -1.026584   1.101423  -0.932   0.3513    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 695.88  on 580  degrees of freedom
## AIC: 765.88
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_cc)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay + 
##     residents_council + family_council + accreditation + region + 
##     antipsychotic_percent + depression_percent + falls_percent + 
##     pain_percent + total_inspections + X5y_inspections + X2y_inspections + 
##     total_complaints + X2y_complaints + X5y_critical + X2y_critical + 
##     total_noncomplaints + X5y_noncomplaints + X2y_noncomplaints + 
##     total_withOrders + X5y_withOrders + X2y_withOrders + X5y_cc + 
##     X2y_cc + total_ccw + X5y_ccw + X2y_ccw, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2603  -0.9337  -0.4337   0.9592   2.2207  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.804341   2.723947   1.030   0.3032    
## home_typeMunicipal    -0.529274   0.293704  -1.802   0.0715 .  
## home_typeNon-Profit    0.056724   0.250802   0.226   0.8211    
## number_beds            0.230246   0.050091   4.597 4.29e-06 ***
## short_stayYes         -0.175140   0.208618  -0.840   0.4012    
## residents_councilYes  -0.547391   0.958233  -0.571   0.5678    
## family_councilYes      0.154790   0.265644   0.583   0.5601    
## accreditationYes      -0.236160   0.276599  -0.854   0.3932    
## regionEast            -0.234281   0.322581  -0.726   0.4677    
## regionNorth           -0.932893   0.425019  -2.195   0.0282 *  
## regionToronto          0.679489   0.481844   1.410   0.1585    
## regionWest            -0.499007   0.288139  -1.732   0.0833 .  
## antipsychotic_percent  0.123678   0.104529   1.183   0.2367    
## depression_percent     0.004994   0.010350   0.483   0.6294    
## falls_percent         -0.012711   0.021543  -0.590   0.5552    
## pain_percent          -0.127622   0.097544  -1.308   0.1908    
## total_inspections     -6.594359   3.425477  -1.925   0.0542 .  
## X5y_inspections        4.487615   3.481494   1.289   0.1974    
## X2y_inspections       -1.087349   1.182821  -0.919   0.3579    
## total_complaints       1.233039   0.552954   2.230   0.0258 *  
## X2y_complaints        -0.274842   0.491417  -0.559   0.5760    
## X5y_critical           1.270735   0.987488   1.287   0.1982    
## X2y_critical          -1.345058   0.916660  -1.467   0.1423    
## total_noncomplaints    3.210968   1.707720   1.880   0.0601 .  
## X5y_noncomplaints     -4.368816   2.303622  -1.896   0.0579 .  
## X2y_noncomplaints      1.085098   0.759714   1.428   0.1532    
## total_withOrders      -0.230660   0.233867  -0.986   0.3240    
## X5y_withOrders         0.724211   0.357667   2.025   0.0429 *  
## X2y_withOrders         0.130018   0.424585   0.306   0.7594    
## X5y_cc                -1.007757   1.077053  -0.936   0.3494    
## X2y_cc                 2.034773   1.479350   1.375   0.1690    
## total_ccw              1.329467   1.375444   0.967   0.3338    
## X5y_ccw               -0.827639   1.200815  -0.689   0.4907    
## X2y_ccw               -1.016332   1.098684  -0.925   0.3549    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 695.90  on 581  degrees of freedom
## AIC: 763.9
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_withOrders)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay + 
##     residents_council + family_council + accreditation + region + 
##     antipsychotic_percent + depression_percent + falls_percent + 
##     pain_percent + total_inspections + X5y_inspections + X2y_inspections + 
##     total_complaints + X2y_complaints + X5y_critical + X2y_critical + 
##     total_noncomplaints + X5y_noncomplaints + X2y_noncomplaints + 
##     total_withOrders + X5y_withOrders + X5y_cc + X2y_cc + total_ccw + 
##     X5y_ccw + X2y_ccw, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2564  -0.9327  -0.4296   0.9591   2.2380  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.700383   2.695570   1.002   0.3164    
## home_typeMunicipal    -0.533489   0.293520  -1.818   0.0691 .  
## home_typeNon-Profit    0.060137   0.250297   0.240   0.8101    
## number_beds            0.232431   0.049612   4.685  2.8e-06 ***
## short_stayYes         -0.174508   0.208632  -0.836   0.4029    
## residents_councilYes  -0.545697   0.951764  -0.573   0.5664    
## family_councilYes      0.153696   0.265543   0.579   0.5627    
## accreditationYes      -0.234102   0.276300  -0.847   0.3968    
## regionEast            -0.227700   0.321645  -0.708   0.4790    
## regionNorth           -0.933344   0.425219  -2.195   0.0282 *  
## regionToronto          0.681302   0.481733   1.414   0.1573    
## regionWest            -0.499361   0.288159  -1.733   0.0831 .  
## antipsychotic_percent  0.124103   0.104345   1.189   0.2343    
## depression_percent     0.005011   0.010349   0.484   0.6282    
## falls_percent         -0.012767   0.021551  -0.592   0.5536    
## pain_percent          -0.124983   0.097147  -1.287   0.1983    
## total_inspections     -6.473664   3.396887  -1.906   0.0567 .  
## X5y_inspections        4.564285   3.476293   1.313   0.1892    
## X2y_inspections       -1.073210   1.182755  -0.907   0.3642    
## total_complaints       1.218242   0.549669   2.216   0.0267 *  
## X2y_complaints        -0.268742   0.491294  -0.547   0.5844    
## X5y_critical           1.296286   0.985445   1.315   0.1884    
## X2y_critical          -1.316457   0.911011  -1.445   0.1484    
## total_noncomplaints    3.148178   1.692015   1.861   0.0628 .  
## X5y_noncomplaints     -4.477747   2.279802  -1.964   0.0495 *  
## X2y_noncomplaints      1.090560   0.759593   1.436   0.1511    
## total_withOrders      -0.228506   0.233684  -0.978   0.3282    
## X5y_withOrders         0.756031   0.342283   2.209   0.0272 *  
## X5y_cc                -1.017847   1.079033  -0.943   0.3455    
## X2y_cc                 1.755008   1.161371   1.511   0.1307    
## total_ccw              1.297022   1.371323   0.946   0.3442    
## X5y_ccw               -0.864496   1.194333  -0.724   0.4692    
## X2y_ccw               -0.735698   0.602462  -1.221   0.2220    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 696.00  on 582  degrees of freedom
## AIC: 762
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -depression_percent)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay + 
##     residents_council + family_council + accreditation + region + 
##     antipsychotic_percent + falls_percent + pain_percent + total_inspections + 
##     X5y_inspections + X2y_inspections + total_complaints + X2y_complaints + 
##     X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + total_withOrders + X5y_withOrders + X5y_cc + 
##     X2y_cc + total_ccw + X5y_ccw + X2y_ccw, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2230  -0.9210  -0.4355   0.9517   2.2353  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.77184    2.69190   1.030   0.3032    
## home_typeMunicipal    -0.50599    0.28777  -1.758   0.0787 .  
## home_typeNon-Profit    0.07516    0.24835   0.303   0.7622    
## number_beds            0.22927    0.04911   4.669 3.03e-06 ***
## short_stayYes         -0.17413    0.20871  -0.834   0.4041    
## residents_councilYes  -0.55367    0.95543  -0.580   0.5623    
## family_councilYes      0.15128    0.26538   0.570   0.5686    
## accreditationYes      -0.23268    0.27635  -0.842   0.3998    
## regionEast            -0.20989    0.31967  -0.657   0.5114    
## regionNorth           -0.92830    0.42417  -2.189   0.0286 *  
## regionToronto          0.66815    0.48067   1.390   0.1645    
## regionWest            -0.47002    0.28156  -1.669   0.0950 .  
## antipsychotic_percent  0.12558    0.10422   1.205   0.2282    
## falls_percent         -0.01082    0.02113  -0.512   0.6084    
## pain_percent          -0.12319    0.09704  -1.269   0.2043    
## total_inspections     -6.46720    3.39713  -1.904   0.0569 .  
## X5y_inspections        4.60174    3.47142   1.326   0.1850    
## X2y_inspections       -1.05464    1.18263  -0.892   0.3725    
## total_complaints       1.20207    0.54827   2.192   0.0283 *  
## X2y_complaints        -0.26575    0.49140  -0.541   0.5886    
## X5y_critical           1.30739    0.98281   1.330   0.1834    
## X2y_critical          -1.29210    0.91040  -1.419   0.1558    
## total_noncomplaints    3.12744    1.69193   1.848   0.0645 .  
## X5y_noncomplaints     -4.50857    2.27522  -1.982   0.0475 *  
## X2y_noncomplaints      1.07269    0.75955   1.412   0.1579    
## total_withOrders      -0.22710    0.23346  -0.973   0.3307    
## X5y_withOrders         0.75939    0.34198   2.221   0.0264 *  
## X5y_cc                -1.00717    1.07745  -0.935   0.3499    
## X2y_cc                 1.71929    1.15910   1.483   0.1380    
## total_ccw              1.32952    1.36867   0.971   0.3314    
## X5y_ccw               -0.89570    1.19166  -0.752   0.4523    
## X2y_ccw               -0.72310    0.60110  -1.203   0.2290    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 696.23  on 583  degrees of freedom
## AIC: 760.23
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -falls_percent)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay + 
##     residents_council + family_council + accreditation + region + 
##     antipsychotic_percent + pain_percent + total_inspections + 
##     X5y_inspections + X2y_inspections + total_complaints + X2y_complaints + 
##     X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + total_withOrders + X5y_withOrders + X5y_cc + 
##     X2y_cc + total_ccw + X5y_ccw + X2y_ccw, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2380  -0.9292  -0.4446   0.9558   2.2335  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.68485    2.68461   1.000   0.3173    
## home_typeMunicipal    -0.51452    0.28715  -1.792   0.0732 .  
## home_typeNon-Profit    0.08527    0.24738   0.345   0.7303    
## number_beds            0.22905    0.04907   4.667 3.05e-06 ***
## short_stayYes         -0.17441    0.20870  -0.836   0.4033    
## residents_councilYes  -0.55241    0.95739  -0.577   0.5639    
## family_councilYes      0.14919    0.26545   0.562   0.5741    
## accreditationYes      -0.22594    0.27599  -0.819   0.4130    
## regionEast            -0.21033    0.31912  -0.659   0.5098    
## regionNorth           -0.91168    0.42321  -2.154   0.0312 *  
## regionToronto          0.70790    0.47487   1.491   0.1360    
## regionWest            -0.47757    0.28104  -1.699   0.0893 .  
## antipsychotic_percent  0.11939    0.10357   1.153   0.2490    
## pain_percent          -0.12982    0.09639  -1.347   0.1780    
## total_inspections     -6.51963    3.39254  -1.922   0.0546 .  
## X5y_inspections        4.63844    3.45924   1.341   0.1800    
## X2y_inspections       -1.06372    1.18211  -0.900   0.3682    
## total_complaints       1.20317    0.54832   2.194   0.0282 *  
## X2y_complaints        -0.26489    0.49151  -0.539   0.5899    
## X5y_critical           1.31296    0.97840   1.342   0.1796    
## X2y_critical          -1.29520    0.91045  -1.423   0.1549    
## total_noncomplaints    3.14885    1.69032   1.863   0.0625 .  
## X5y_noncomplaints     -4.55939    2.26329  -2.014   0.0440 *  
## X2y_noncomplaints      1.07998    0.75848   1.424   0.1545    
## total_withOrders      -0.23397    0.23314  -1.004   0.3156    
## X5y_withOrders         0.76615    0.34151   2.243   0.0249 *  
## X5y_cc                -1.00972    1.07417  -0.940   0.3472    
## X2y_cc                 1.72565    1.15882   1.489   0.1364    
## total_ccw              1.36840    1.36440   1.003   0.3159    
## X5y_ccw               -0.90858    1.18950  -0.764   0.4450    
## X2y_ccw               -0.73282    0.60057  -1.220   0.2224    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 696.49  on 584  degrees of freedom
## AIC: 758.49
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_complaints)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay + 
##     residents_council + family_council + accreditation + region + 
##     antipsychotic_percent + pain_percent + total_inspections + 
##     X5y_inspections + X2y_inspections + total_complaints + X5y_critical + 
##     X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + total_withOrders + X5y_withOrders + X5y_cc + 
##     X2y_cc + total_ccw + X5y_ccw + X2y_ccw, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2408  -0.9284  -0.4264   0.9466   2.2079  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.70022    2.68973   1.004   0.3154    
## home_typeMunicipal    -0.51859    0.28681  -1.808   0.0706 .  
## home_typeNon-Profit    0.09288    0.24706   0.376   0.7070    
## number_beds            0.22716    0.04891   4.645 3.41e-06 ***
## short_stayYes         -0.17183    0.20859  -0.824   0.4101    
## residents_councilYes  -0.55943    0.95964  -0.583   0.5599    
## family_councilYes      0.15559    0.26536   0.586   0.5577    
## accreditationYes      -0.23562    0.27537  -0.856   0.3922    
## regionEast            -0.20664    0.31876  -0.648   0.5168    
## regionNorth           -0.92449    0.42277  -2.187   0.0288 *  
## regionToronto          0.71427    0.47492   1.504   0.1326    
## regionWest            -0.48419    0.28064  -1.725   0.0845 .  
## antipsychotic_percent  0.12110    0.10340   1.171   0.2415    
## pain_percent          -0.12996    0.09649  -1.347   0.1780    
## total_inspections     -6.36560    3.38369  -1.881   0.0599 .  
## X5y_inspections        4.40330    3.44056   1.280   0.2006    
## X2y_inspections       -1.18816    1.15711  -1.027   0.3045    
## total_complaints       1.16791    0.54524   2.142   0.0322 *  
## X5y_critical           1.23688    0.97128   1.273   0.2029    
## X2y_critical          -1.00602    0.73034  -1.377   0.1684    
## total_noncomplaints    3.08693    1.68784   1.829   0.0674 .  
## X5y_noncomplaints     -4.40094    2.24900  -1.957   0.0504 .  
## X2y_noncomplaints      1.14661    0.74676   1.535   0.1247    
## total_withOrders      -0.23777    0.23268  -1.022   0.3068    
## X5y_withOrders         0.76294    0.34117   2.236   0.0253 *  
## X5y_cc                -0.93633    1.06922  -0.876   0.3812    
## X2y_cc                 1.37304    0.95329   1.440   0.1498    
## total_ccw              1.36559    1.36139   1.003   0.3158    
## X5y_ccw               -0.86630    1.18779  -0.729   0.4658    
## X2y_ccw               -0.75421    0.59871  -1.260   0.2078    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 696.78  on 585  degrees of freedom
## AIC: 756.78
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -residents_council)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay + 
##     family_council + accreditation + region + antipsychotic_percent + 
##     pain_percent + total_inspections + X5y_inspections + X2y_inspections + 
##     total_complaints + X5y_critical + X2y_critical + total_noncomplaints + 
##     X5y_noncomplaints + X2y_noncomplaints + total_withOrders + 
##     X5y_withOrders + X5y_cc + X2y_cc + total_ccw + X5y_ccw + 
##     X2y_ccw, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0221  -0.9313  -0.4281   0.9548   2.2070  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.34095    2.62083   0.893   0.3717    
## home_typeMunicipal    -0.52684    0.28638  -1.840   0.0658 .  
## home_typeNon-Profit    0.09582    0.24662   0.389   0.6976    
## number_beds            0.22857    0.04884   4.680 2.87e-06 ***
## short_stayYes         -0.17836    0.20836  -0.856   0.3920    
## family_councilYes      0.14207    0.26349   0.539   0.5898    
## accreditationYes      -0.25099    0.27382  -0.917   0.3593    
## regionEast            -0.20881    0.31851  -0.656   0.5121    
## regionNorth           -0.92023    0.42321  -2.174   0.0297 *  
## regionToronto          0.70301    0.47449   1.482   0.1384    
## regionWest            -0.48112    0.28029  -1.717   0.0861 .  
## antipsychotic_percent  0.12472    0.10348   1.205   0.2281    
## pain_percent          -0.13262    0.09636  -1.376   0.1688    
## total_inspections     -6.61085    3.36414  -1.965   0.0494 *  
## X5y_inspections        4.58554    3.42675   1.338   0.1808    
## X2y_inspections       -1.12016    1.15367  -0.971   0.3316    
## total_complaints       1.19999    0.54422   2.205   0.0275 *  
## X5y_critical           1.29858    0.96688   1.343   0.1793    
## X2y_critical          -0.99339    0.73004  -1.361   0.1736    
## total_noncomplaints    3.16244    1.68612   1.876   0.0607 .  
## X5y_noncomplaints     -4.51485    2.24214  -2.014   0.0440 *  
## X2y_noncomplaints      1.11827    0.74592   1.499   0.1338    
## total_withOrders      -0.23648    0.23275  -1.016   0.3096    
## X5y_withOrders         0.77367    0.34132   2.267   0.0234 *  
## X5y_cc                -0.96161    1.07026  -0.898   0.3689    
## X2y_cc                 1.30754    0.94794   1.379   0.1678    
## total_ccw              1.43003    1.35655   1.054   0.2918    
## X5y_ccw               -0.94048    1.17967  -0.797   0.4253    
## X2y_ccw               -0.73721    0.59859  -1.232   0.2181    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 697.14  on 586  degrees of freedom
## AIC: 755.14
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -short_stay)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     accreditation + region + antipsychotic_percent + pain_percent + 
##     total_inspections + X5y_inspections + X2y_inspections + total_complaints + 
##     X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + total_withOrders + X5y_withOrders + X5y_cc + 
##     X2y_cc + total_ccw + X5y_ccw + X2y_ccw, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0332  -0.9223  -0.4290   0.9560   2.2214  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.43153    2.61469   0.930   0.3524    
## home_typeMunicipal    -0.54425    0.28569  -1.905   0.0568 .  
## home_typeNon-Profit    0.09104    0.24630   0.370   0.7117    
## number_beds            0.22318    0.04841   4.610 4.02e-06 ***
## family_councilYes      0.14100    0.26358   0.535   0.5927    
## accreditationYes      -0.26201    0.27349  -0.958   0.3381    
## regionEast            -0.21343    0.31876  -0.670   0.5031    
## regionNorth           -0.94946    0.42105  -2.255   0.0241 *  
## regionToronto          0.68619    0.47519   1.444   0.1487    
## regionWest            -0.47474    0.27996  -1.696   0.0899 .  
## antipsychotic_percent  0.12357    0.10351   1.194   0.2326    
## pain_percent          -0.13585    0.09625  -1.411   0.1581    
## total_inspections     -6.65872    3.35491  -1.985   0.0472 *  
## X5y_inspections        4.53577    3.41497   1.328   0.1841    
## X2y_inspections       -1.10037    1.15078  -0.956   0.3390    
## total_complaints       1.22006    0.54252   2.249   0.0245 *  
## X5y_critical           1.26720    0.96436   1.314   0.1888    
## X2y_critical          -0.94208    0.72653  -1.297   0.1947    
## total_noncomplaints    3.22708    1.68026   1.921   0.0548 .  
## X5y_noncomplaints     -4.46926    2.23469  -2.000   0.0455 *  
## X2y_noncomplaints      1.07155    0.74189   1.444   0.1486    
## total_withOrders      -0.22538    0.23203  -0.971   0.3314    
## X5y_withOrders         0.77714    0.34108   2.278   0.0227 *  
## X5y_cc                -0.94166    1.06740  -0.882   0.3777    
## X2y_cc                 1.30188    0.94607   1.376   0.1688    
## total_ccw              1.37564    1.35515   1.015   0.3100    
## X5y_ccw               -0.93057    1.17959  -0.789   0.4302    
## X2y_ccw               -0.74280    0.59771  -1.243   0.2140    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 697.87  on 587  degrees of freedom
## AIC: 753.87
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_ccw)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     accreditation + region + antipsychotic_percent + pain_percent + 
##     total_inspections + X5y_inspections + X2y_inspections + total_complaints + 
##     X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + total_withOrders + X5y_withOrders + X5y_cc + 
##     X2y_cc + total_ccw + X2y_ccw, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0395  -0.9261  -0.4417   0.9620   2.2337  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.67816    2.59524   1.032   0.3021    
## home_typeMunicipal    -0.52996    0.28495  -1.860   0.0629 .  
## home_typeNon-Profit    0.08109    0.24592   0.330   0.7416    
## number_beds            0.22237    0.04830   4.604 4.15e-06 ***
## family_councilYes      0.16265    0.26192   0.621   0.5346    
## accreditationYes      -0.25758    0.27276  -0.944   0.3450    
## regionEast            -0.20441    0.31851  -0.642   0.5210    
## regionNorth           -0.92949    0.42029  -2.212   0.0270 *  
## regionToronto          0.71512    0.47493   1.506   0.1321    
## regionWest            -0.48396    0.27963  -1.731   0.0835 .  
## antipsychotic_percent  0.11826    0.10287   1.150   0.2503    
## pain_percent          -0.14055    0.09584  -1.467   0.1425    
## total_inspections     -5.94024    3.21699  -1.847   0.0648 .  
## X5y_inspections        2.96074    2.74133   1.080   0.2801    
## X2y_inspections       -1.36052    1.10071  -1.236   0.2164    
## total_complaints       1.22676    0.54218   2.263   0.0237 *  
## X5y_critical           1.01173    0.89954   1.125   0.2607    
## X2y_critical          -1.10173    0.69904  -1.576   0.1150    
## total_noncomplaints    3.11111    1.67023   1.863   0.0625 .  
## X5y_noncomplaints     -3.71594    1.99628  -1.861   0.0627 .  
## X2y_noncomplaints      1.25966    0.70269   1.793   0.0730 .  
## total_withOrders      -0.19056    0.22776  -0.837   0.4028    
## X5y_withOrders         0.63175    0.28416   2.223   0.0262 *  
## X5y_cc                -0.80003    1.04376  -0.766   0.4434    
## X2y_cc                 1.51841    0.90623   1.676   0.0938 .  
## total_ccw              0.79024    1.12149   0.705   0.4810    
## X2y_ccw               -0.79088    0.59374  -1.332   0.1829    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 698.49  on 588  degrees of freedom
## AIC: 752.49
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_ccw)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     accreditation + region + antipsychotic_percent + pain_percent + 
##     total_inspections + X5y_inspections + X2y_inspections + total_complaints + 
##     X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + total_withOrders + X5y_withOrders + X5y_cc + 
##     X2y_cc + X2y_ccw, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0018  -0.9220  -0.4382   0.9549   2.2129  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            1.80939    2.26973   0.797   0.4253    
## home_typeMunicipal    -0.52533    0.28506  -1.843   0.0653 .  
## home_typeNon-Profit    0.06322    0.24423   0.259   0.7957    
## number_beds            0.22766    0.04777   4.766 1.88e-06 ***
## family_councilYes      0.16338    0.26153   0.625   0.5322    
## accreditationYes      -0.25514    0.27228  -0.937   0.3487    
## regionEast            -0.20687    0.31817  -0.650   0.5156    
## regionNorth           -0.91735    0.42016  -2.183   0.0290 *  
## regionToronto          0.67056    0.46894   1.430   0.1527    
## regionWest            -0.47206    0.27884  -1.693   0.0905 .  
## antipsychotic_percent  0.12103    0.10281   1.177   0.2391    
## pain_percent          -0.13953    0.09562  -1.459   0.1445    
## total_inspections     -4.43748    2.39363  -1.854   0.0638 .  
## X5y_inspections        2.94857    2.73987   1.076   0.2818    
## X2y_inspections       -1.17819    1.06654  -1.105   0.2693    
## total_complaints       1.11110    0.51307   2.166   0.0303 *  
## X5y_critical           1.11279    0.88943   1.251   0.2109    
## X2y_critical          -1.02568    0.68789  -1.491   0.1359    
## total_noncomplaints    2.65824    1.53338   1.734   0.0830 .  
## X5y_noncomplaints     -3.86366    1.98644  -1.945   0.0518 .  
## X2y_noncomplaints      1.12484    0.67410   1.669   0.0952 .  
## total_withOrders      -0.14713    0.21946  -0.670   0.5026    
## X5y_withOrders         0.65681    0.28191   2.330   0.0198 *  
## X5y_cc                -0.81335    1.04359  -0.779   0.4358    
## X2y_cc                 1.37845    0.88297   1.561   0.1185    
## X2y_ccw               -0.72918    0.58671  -1.243   0.2139    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 698.99  on 589  degrees of freedom
## AIC: 750.99
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_withOrders)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     accreditation + region + antipsychotic_percent + pain_percent + 
##     total_inspections + X5y_inspections + X2y_inspections + total_complaints + 
##     X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + X5y_withOrders + X5y_cc + X2y_cc + X2y_ccw, 
##     family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9696  -0.9239  -0.4421   0.9624   2.2042  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.04860    2.24243   0.914  0.36095    
## home_typeMunicipal    -0.51583    0.28446  -1.813  0.06977 .  
## home_typeNon-Profit    0.05386    0.24395   0.221  0.82527    
## number_beds            0.22821    0.04780   4.774  1.8e-06 ***
## family_councilYes      0.16993    0.26120   0.651  0.51533    
## accreditationYes      -0.25533    0.27227  -0.938  0.34835    
## regionEast            -0.21046    0.31779  -0.662  0.50780    
## regionNorth           -0.92606    0.42047  -2.202  0.02763 *  
## regionToronto          0.66895    0.46926   1.426  0.15400    
## regionWest            -0.44010    0.27456  -1.603  0.10895    
## antipsychotic_percent  0.11455    0.10231   1.120  0.26287    
## pain_percent          -0.13978    0.09573  -1.460  0.14426    
## total_inspections     -4.47687    2.39477  -1.869  0.06156 .  
## X5y_inspections        2.94013    2.74932   1.069  0.28489    
## X2y_inspections       -1.15446    1.06732  -1.082  0.27941    
## total_complaints       1.09651    0.51258   2.139  0.03242 *  
## X5y_critical           1.12913    0.89243   1.265  0.20579    
## X2y_critical          -1.03111    0.69077  -1.493  0.13552    
## total_noncomplaints    2.52636    1.52126   1.661  0.09677 .  
## X5y_noncomplaints     -3.82543    1.99178  -1.921  0.05478 .  
## X2y_noncomplaints      1.12171    0.67575   1.660  0.09692 .  
## X5y_withOrders         0.52769    0.20441   2.582  0.00984 ** 
## X5y_cc                -0.77946    1.04659  -0.745  0.45641    
## X2y_cc                 1.35645    0.88432   1.534  0.12506    
## X2y_ccw               -0.71339    0.58634  -1.217  0.22373    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 699.44  on 590  degrees of freedom
## AIC: 749.44
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_cc)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     accreditation + region + antipsychotic_percent + pain_percent + 
##     total_inspections + X5y_inspections + X2y_inspections + total_complaints + 
##     X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + X5y_withOrders + X2y_cc + X2y_ccw, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0194  -0.9314  -0.4577   0.9705   2.1533  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.36640    2.20105   1.075   0.2823    
## home_typeMunicipal    -0.52079    0.28438  -1.831   0.0671 .  
## home_typeNon-Profit    0.05276    0.24394   0.216   0.8288    
## number_beds            0.23033    0.04772   4.826 1.39e-06 ***
## family_councilYes      0.15988    0.26053   0.614   0.5394    
## accreditationYes      -0.25488    0.27202  -0.937   0.3488    
## regionEast            -0.19731    0.31702  -0.622   0.5337    
## regionNorth           -0.91479    0.41974  -2.179   0.0293 *  
## regionToronto          0.65225    0.46847   1.392   0.1638    
## regionWest            -0.44717    0.27400  -1.632   0.1027    
## antipsychotic_percent  0.11266    0.10223   1.102   0.2705    
## pain_percent          -0.13938    0.09582  -1.455   0.1458    
## total_inspections     -3.70803    2.14379  -1.730   0.0837 .  
## X5y_inspections        1.02778    0.96551   1.064   0.2871    
## X2y_inspections       -0.79182    0.95036  -0.833   0.4047    
## total_complaints       0.91716    0.44790   2.048   0.0406 *  
## X5y_critical           0.54386    0.41390   1.314   0.1888    
## X2y_critical          -0.82836    0.63367  -1.307   0.1911    
## total_noncomplaints    2.09217    1.39815   1.496   0.1346    
## X5y_noncomplaints     -2.57157    1.04837  -2.453   0.0142 *  
## X2y_noncomplaints      0.95481    0.63643   1.500   0.1335    
## X5y_withOrders         0.52460    0.20415   2.570   0.0102 *  
## X2y_cc                 1.06663    0.79609   1.340   0.1803    
## X2y_ccw               -0.69675    0.58615  -1.189   0.2346    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 699.99  on 591  degrees of freedom
## AIC: 747.99
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_inspections)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     accreditation + region + antipsychotic_percent + pain_percent + 
##     total_inspections + X5y_inspections + total_complaints + 
##     X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + X5y_withOrders + X2y_cc + X2y_ccw, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0210  -0.9361  -0.4460   0.9653   2.1777  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.91152    2.11370   1.377  0.16837    
## home_typeMunicipal    -0.50632    0.28305  -1.789  0.07365 .  
## home_typeNon-Profit    0.07548    0.24215   0.312  0.75525    
## number_beds            0.22687    0.04736   4.791 1.66e-06 ***
## family_councilYes      0.16354    0.26036   0.628  0.52992    
## accreditationYes      -0.25708    0.27212  -0.945  0.34480    
## regionEast            -0.18226    0.31628  -0.576  0.56445    
## regionNorth           -0.91243    0.41784  -2.184  0.02899 *  
## regionToronto          0.69147    0.46902   1.474  0.14041    
## regionWest            -0.44141    0.27375  -1.612  0.10687    
## antipsychotic_percent  0.11375    0.10253   1.109  0.26726    
## pain_percent          -0.13891    0.09602  -1.447  0.14797    
## total_inspections     -4.21445    2.06271  -2.043  0.04104 *  
## X5y_inspections        0.92425    0.95695   0.966  0.33413    
## total_complaints       1.02301    0.43169   2.370  0.01780 *  
## X5y_critical           0.61442    0.40544   1.515  0.12966    
## X2y_critical          -0.58086    0.55575  -1.045  0.29593    
## total_noncomplaints    2.38843    1.35409   1.764  0.07776 .  
## X5y_noncomplaints     -2.60186    1.04965  -2.479  0.01318 *  
## X2y_noncomplaints      0.59879    0.46588   1.285  0.19869    
## X5y_withOrders         0.55289    0.20127   2.747  0.00601 ** 
## X2y_cc                 0.70909    0.66439   1.067  0.28584    
## X2y_ccw               -0.78258    0.57518  -1.361  0.17364    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 700.68  on 592  degrees of freedom
## AIC: 746.68
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -accreditation)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + antipsychotic_percent + pain_percent + total_inspections + 
##     X5y_inspections + total_complaints + X5y_critical + X2y_critical + 
##     total_noncomplaints + X5y_noncomplaints + X2y_noncomplaints + 
##     X5y_withOrders + X2y_cc + X2y_ccw, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0428  -0.9386  -0.4578   0.9689   2.1399  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.89085    2.11416   1.367  0.17151    
## home_typeMunicipal    -0.44474    0.27573  -1.613  0.10675    
## home_typeNon-Profit    0.14486    0.23062   0.628  0.52992    
## number_beds            0.22090    0.04684   4.716 2.41e-06 ***
## family_councilYes      0.13525    0.25756   0.525  0.59950    
## regionEast            -0.18854    0.31584  -0.597  0.55053    
## regionNorth           -0.92557    0.41761  -2.216  0.02667 *  
## regionToronto          0.68540    0.46935   1.460  0.14420    
## regionWest            -0.41931    0.27233  -1.540  0.12363    
## antipsychotic_percent  0.11407    0.10216   1.117  0.26417    
## pain_percent          -0.13908    0.09579  -1.452  0.14651    
## total_inspections     -4.35228    2.05768  -2.115  0.03442 *  
## X5y_inspections        0.94249    0.95518   0.987  0.32378    
## total_complaints       1.04487    0.43090   2.425  0.01531 *  
## X5y_critical           0.63273    0.40446   1.564  0.11773    
## X2y_critical          -0.59673    0.55544  -1.074  0.28267    
## total_noncomplaints    2.47423    1.35108   1.831  0.06706 .  
## X5y_noncomplaints     -2.62914    1.04882  -2.507  0.01218 *  
## X2y_noncomplaints      0.61488    0.46571   1.320  0.18673    
## X5y_withOrders         0.55432    0.20068   2.762  0.00574 ** 
## X2y_cc                 0.69907    0.66311   1.054  0.29177    
## X2y_ccw               -0.78554    0.57386  -1.369  0.17104    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 701.57  on 593  degrees of freedom
## AIC: 745.57
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_inspections)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + antipsychotic_percent + pain_percent + total_inspections + 
##     total_complaints + X5y_critical + X2y_critical + total_noncomplaints + 
##     X5y_noncomplaints + X2y_noncomplaints + X5y_withOrders + 
##     X2y_cc + X2y_ccw, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9985  -0.9303  -0.4649   0.9575   2.1177  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.99094    2.10654   1.420  0.15566    
## home_typeMunicipal    -0.42710    0.27481  -1.554  0.12014    
## home_typeNon-Profit    0.15064    0.23037   0.654  0.51317    
## number_beds            0.21747    0.04663   4.664  3.1e-06 ***
## family_councilYes      0.12962    0.25704   0.504  0.61407    
## regionEast            -0.16847    0.31506  -0.535  0.59285    
## regionNorth           -0.91004    0.41719  -2.181  0.02916 *  
## regionToronto          0.63731    0.46520   1.370  0.17070    
## regionWest            -0.39425    0.27099  -1.455  0.14572    
## antipsychotic_percent  0.11458    0.10199   1.123  0.26127    
## pain_percent          -0.13181    0.09541  -1.381  0.16714    
## total_inspections     -3.90122    2.00008  -1.951  0.05111 .  
## total_complaints       1.04607    0.42982   2.434  0.01494 *  
## X5y_critical           0.70282    0.39941   1.760  0.07847 .  
## X2y_critical          -0.77999    0.52457  -1.487  0.13704    
## total_noncomplaints    2.14551    1.30506   1.644  0.10018    
## X5y_noncomplaints     -1.93897    0.78059  -2.484  0.01299 *  
## X2y_noncomplaints      0.57237    0.46430   1.233  0.21766    
## X5y_withOrders         0.57058    0.19977   2.856  0.00429 ** 
## X2y_cc                 0.91083    0.62842   1.449  0.14722    
## X2y_ccw               -0.78355    0.57421  -1.365  0.17239    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 702.55  on 594  degrees of freedom
## AIC: 744.55
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -antipsychotic_percent)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + pain_percent + total_inspections + total_complaints + 
##     X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + X5y_withOrders + X2y_cc + X2y_ccw, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9797  -0.9334  -0.4462   0.9671   2.0869  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          3.50303    2.05956   1.701  0.08897 .  
## home_typeMunicipal  -0.43040    0.27433  -1.569  0.11666    
## home_typeNon-Profit  0.13309    0.22975   0.579  0.56240    
## number_beds          0.21918    0.04657   4.707 2.52e-06 ***
## family_councilYes    0.13572    0.25630   0.530  0.59643    
## regionEast          -0.16780    0.31541  -0.532  0.59473    
## regionNorth         -0.89599    0.41596  -2.154  0.03124 *  
## regionToronto        0.64095    0.46432   1.380  0.16746    
## regionWest          -0.37520    0.27017  -1.389  0.16491    
## pain_percent        -0.12152    0.09488  -1.281  0.20030    
## total_inspections   -3.95774    2.00034  -1.979  0.04787 *  
## total_complaints     1.05673    0.43007   2.457  0.01401 *  
## X5y_critical         0.68811    0.39908   1.724  0.08467 .  
## X2y_critical        -0.82278    0.52237  -1.575  0.11524    
## total_noncomplaints  2.12001    1.30545   1.624  0.10438    
## X5y_noncomplaints   -1.89222    0.78040  -2.425  0.01532 *  
## X2y_noncomplaints    0.62045    0.46180   1.344  0.17909    
## X5y_withOrders       0.57288    0.19985   2.867  0.00415 ** 
## X2y_cc               0.95309    0.62560   1.523  0.12764    
## X2y_ccw             -0.80860    0.57235  -1.413  0.15772    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 703.82  on 595  degrees of freedom
## AIC: 743.82
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -pain_percent)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + total_inspections + total_complaints + X5y_critical + 
##     X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X2y_noncomplaints + X5y_withOrders + X2y_cc + X2y_ccw, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0027  -0.9297  -0.4657   0.9559   2.0742  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          3.09072    2.03063   1.522  0.12800    
## home_typeMunicipal  -0.45334    0.27324  -1.659  0.09709 .  
## home_typeNon-Profit  0.12486    0.22930   0.545  0.58607    
## number_beds          0.22697    0.04612   4.921 8.59e-07 ***
## family_councilYes    0.10801    0.25521   0.423  0.67212    
## regionEast          -0.20528    0.31306  -0.656  0.51200    
## regionNorth         -1.00268    0.40810  -2.457  0.01401 *  
## regionToronto        0.65151    0.46432   1.403  0.16057    
## regionWest          -0.42788    0.26659  -1.605  0.10848    
## total_inspections   -3.79647    1.99121  -1.907  0.05657 .  
## total_complaints     1.03657    0.42953   2.413  0.01581 *  
## X5y_critical         0.66998    0.39873   1.680  0.09290 .  
## X2y_critical        -0.81205    0.52060  -1.560  0.11880    
## total_noncomplaints  2.04161    1.29950   1.571  0.11617    
## X5y_noncomplaints   -1.93381    0.77803  -2.486  0.01294 *  
## X2y_noncomplaints    0.64394    0.45948   1.401  0.16108    
## X5y_withOrders       0.57761    0.19985   2.890  0.00385 ** 
## X2y_cc               0.98048    0.62390   1.572  0.11606    
## X2y_ccw             -0.85041    0.57074  -1.490  0.13622    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 705.47  on 596  degrees of freedom
## AIC: 743.47
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_noncomplaints)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + total_inspections + total_complaints + X5y_critical + 
##     X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X5y_withOrders + X2y_cc + X2y_ccw, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0273  -0.9286  -0.4827   0.9758   2.1461  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          2.82809    2.01300   1.405   0.1600    
## home_typeMunicipal  -0.43404    0.27212  -1.595   0.1107    
## home_typeNon-Profit  0.13610    0.22849   0.596   0.5514    
## number_beds          0.22667    0.04599   4.929 8.28e-07 ***
## family_councilYes    0.10265    0.25497   0.403   0.6872    
## regionEast          -0.23724    0.31170  -0.761   0.4466    
## regionNorth         -0.94425    0.40530  -2.330   0.0198 *  
## regionToronto        0.62598    0.46420   1.349   0.1775    
## regionWest          -0.42757    0.26584  -1.608   0.1078    
## total_inspections   -4.00644    1.98624  -2.017   0.0437 *  
## total_complaints     1.06292    0.42950   2.475   0.0133 *  
## X5y_critical         0.52248    0.38282   1.365   0.1723    
## X2y_critical        -0.33414    0.39207  -0.852   0.3941    
## total_noncomplaints  2.23947    1.29320   1.732   0.0833 .  
## X5y_noncomplaints   -1.46195    0.69526  -2.103   0.0355 *  
## X5y_withOrders       0.52105    0.19486   2.674   0.0075 ** 
## X2y_cc               0.52085    0.52998   0.983   0.3257    
## X2y_ccw             -0.36546    0.45281  -0.807   0.4196    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 707.46  on 597  degrees of freedom
## AIC: 743.46
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_ccw)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + total_inspections + total_complaints + X5y_critical + 
##     X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X5y_withOrders + X2y_cc, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0287  -0.9291  -0.4727   0.9705   2.1238  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          2.87118    2.00789   1.430   0.1527    
## home_typeMunicipal  -0.44120    0.27155  -1.625   0.1042    
## home_typeNon-Profit  0.12881    0.22844   0.564   0.5728    
## number_beds          0.22809    0.04596   4.963 6.96e-07 ***
## family_councilYes    0.10866    0.25437   0.427   0.6693    
## regionEast          -0.21321    0.31018  -0.687   0.4919    
## regionNorth         -0.91458    0.40280  -2.271   0.0232 *  
## regionToronto        0.60258    0.46302   1.301   0.1931    
## regionWest          -0.41806    0.26532  -1.576   0.1151    
## total_inspections   -4.01902    1.98189  -2.028   0.0426 *  
## total_complaints     1.06613    0.42814   2.490   0.0128 *  
## X5y_critical         0.56047    0.37967   1.476   0.1399    
## X2y_critical        -0.37373    0.38919  -0.960   0.3369    
## total_noncomplaints  2.24408    1.29159   1.737   0.0823 .  
## X5y_noncomplaints   -1.48109    0.69475  -2.132   0.0330 *  
## X5y_withOrders       0.43435    0.16217   2.678   0.0074 ** 
## X2y_cc               0.16527    0.29230   0.565   0.5718    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 708.11  on 598  degrees of freedom
## AIC: 742.11
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_cc)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + total_inspections + total_complaints + X5y_critical + 
##     X2y_critical + total_noncomplaints + X5y_noncomplaints + 
##     X5y_withOrders, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0514  -0.9323  -0.4752   0.9748   2.1478  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          2.95484    2.00303   1.475  0.14016    
## home_typeMunicipal  -0.44372    0.27165  -1.633  0.10237    
## home_typeNon-Profit  0.13273    0.22808   0.582  0.56060    
## number_beds          0.23267    0.04527   5.140 2.75e-07 ***
## family_councilYes    0.11070    0.25446   0.435  0.66354    
## regionEast          -0.18458    0.30571  -0.604  0.54600    
## regionNorth         -0.90698    0.40225  -2.255  0.02415 *  
## regionToronto        0.58071    0.46084   1.260  0.20763    
## regionWest          -0.41830    0.26547  -1.576  0.11509    
## total_inspections   -4.01728    1.98229  -2.027  0.04270 *  
## total_complaints     1.10315    0.42343   2.605  0.00918 ** 
## X5y_critical         0.57081    0.37879   1.507  0.13183    
## X2y_critical        -0.23330    0.29942  -0.779  0.43588    
## total_noncomplaints  2.19030    1.28801   1.701  0.08903 .  
## X5y_noncomplaints   -1.48421    0.69472  -2.136  0.03265 *  
## X5y_withOrders       0.44466    0.16127   2.757  0.00583 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 708.43  on 599  degrees of freedom
## AIC: 740.43
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_critical)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + total_inspections + total_complaints + X5y_critical + 
##     total_noncomplaints + X5y_noncomplaints + X5y_withOrders, 
##     family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0631  -0.9339  -0.4628   0.9761   2.1563  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          2.87600    1.99363   1.443  0.14914    
## home_typeMunicipal  -0.45309    0.27132  -1.670  0.09493 .  
## home_typeNon-Profit  0.12590    0.22767   0.553  0.58026    
## number_beds          0.23159    0.04523   5.121 3.04e-07 ***
## family_councilYes    0.11552    0.25448   0.454  0.64987    
## regionEast          -0.12607    0.29631  -0.425  0.67051    
## regionNorth         -0.87346    0.40002  -2.184  0.02900 *  
## regionToronto        0.59382    0.46095   1.288  0.19767    
## regionWest          -0.39703    0.26401  -1.504  0.13263    
## total_inspections   -4.14821    1.97248  -2.103  0.03546 *  
## total_complaints     1.13624    0.42088   2.700  0.00694 ** 
## X5y_critical         0.39421    0.30216   1.305  0.19201    
## total_noncomplaints  2.28821    1.28094   1.786  0.07404 .  
## X5y_noncomplaints   -1.44184    0.69191  -2.084  0.03717 *  
## X5y_withOrders       0.43556    0.16056   2.713  0.00667 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 709.04  on 600  degrees of freedom
## AIC: 739.04
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_critical)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + total_inspections + total_complaints + total_noncomplaints + 
##     X5y_noncomplaints + X5y_withOrders, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0427  -0.9397  -0.4745   0.9736   2.1326  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          2.17270    1.91988   1.132  0.25777    
## home_typeMunicipal  -0.44868    0.27055  -1.658  0.09723 .  
## home_typeNon-Profit  0.11171    0.22680   0.493  0.62232    
## number_beds          0.24547    0.04404   5.574 2.49e-08 ***
## family_councilYes    0.12014    0.25414   0.473  0.63641    
## regionEast          -0.10225    0.29512  -0.346  0.72899    
## regionNorth         -0.87908    0.39953  -2.200  0.02779 *  
## regionToronto        0.59456    0.45960   1.294  0.19579    
## regionWest          -0.38521    0.26329  -1.463  0.14345    
## total_inspections   -4.44497    1.95868  -2.269  0.02325 *  
## total_complaints     1.20621    0.41784   2.887  0.00389 ** 
## total_noncomplaints  2.55459    1.26365   2.022  0.04322 *  
## X5y_noncomplaints   -0.75877    0.45154  -1.680  0.09288 .  
## X5y_withOrders       0.31796    0.13232   2.403  0.01626 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 710.76  on 601  degrees of freedom
## AIC: 738.76
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_noncomplaints)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + total_inspections + total_complaints + total_noncomplaints + 
##     X5y_withOrders, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0095  -0.9504  -0.4767   0.9919   2.1132  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          1.88897    1.91095   0.988  0.32291    
## home_typeMunicipal  -0.44455    0.27019  -1.645  0.09991 .  
## home_typeNon-Profit  0.12092    0.22553   0.536  0.59186    
## number_beds          0.23716    0.04348   5.455 4.91e-08 ***
## family_councilYes    0.13028    0.25404   0.513  0.60807    
## regionEast          -0.22078    0.28461  -0.776  0.43791    
## regionNorth         -0.93325    0.39813  -2.344  0.01908 *  
## regionToronto        0.52876    0.45531   1.161  0.24551    
## regionWest          -0.37431    0.26198  -1.429  0.15306    
## total_inspections   -4.36075    1.95984  -2.225  0.02608 *  
## total_complaints     1.19345    0.41824   2.854  0.00432 ** 
## total_noncomplaints  2.04527    1.22706   1.667  0.09555 .  
## X5y_withOrders       0.21369    0.11596   1.843  0.06536 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 713.60  on 602  degrees of freedom
## AIC: 739.6
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_noncomplaints)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + total_inspections + total_complaints + X5y_withOrders, 
##     family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9451  -0.9550  -0.4645   0.9864   2.0832  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -0.66077    1.12918  -0.585  0.55843    
## home_typeMunicipal  -0.44994    0.26975  -1.668  0.09531 .  
## home_typeNon-Profit  0.15183    0.22511   0.674  0.50002    
## number_beds          0.23448    0.04330   5.416  6.1e-08 ***
## family_councilYes    0.09182    0.25283   0.363  0.71647    
## regionEast          -0.19619    0.28466  -0.689  0.49070    
## regionNorth         -0.88085    0.39757  -2.216  0.02672 *  
## regionToronto        0.57336    0.46356   1.237  0.21614    
## regionWest          -0.33647    0.26206  -1.284  0.19916    
## total_inspections   -1.22701    0.50821  -2.414  0.01576 *  
## total_complaints     0.58166    0.18416   3.158  0.00159 ** 
## X5y_withOrders       0.21870    0.11480   1.905  0.05677 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 716.44  on 603  degrees of freedom
## AIC: 740.44
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_withOrders)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + total_inspections + total_complaints, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0188  -0.9600  -0.4620   0.9789   2.0712  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.21539    1.08491  -1.120   0.2626    
## home_typeMunicipal  -0.48239    0.26858  -1.796   0.0725 .  
## home_typeNon-Profit  0.19029    0.22352   0.851   0.3946    
## number_beds          0.23040    0.04295   5.364 8.13e-08 ***
## family_councilYes    0.11959    0.25154   0.475   0.6345    
## regionEast          -0.29927    0.27835  -1.075   0.2823    
## regionNorth         -0.79876    0.39239  -2.036   0.0418 *  
## regionToronto        0.47976    0.45974   1.044   0.2967    
## regionWest          -0.41542    0.25858  -1.607   0.1082    
## total_inspections   -0.89250    0.47219  -1.890   0.0587 .  
## total_complaints     0.54281    0.18160   2.989   0.0028 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 720.12  on 604  degrees of freedom
## AIC: 742.12
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_inspections)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council + 
##     region + total_complaints, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9936  -0.9639  -0.4442   0.9950   2.0009  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -3.07844    0.47856  -6.433 1.25e-10 ***
## home_typeMunicipal  -0.51366    0.26651  -1.927  0.05393 .  
## home_typeNon-Profit  0.20671    0.22315   0.926  0.35429    
## number_beds          0.22264    0.04264   5.221 1.78e-07 ***
## family_councilYes    0.11749    0.25168   0.467  0.64062    
## regionEast          -0.45305    0.26586  -1.704  0.08837 .  
## regionNorth         -0.99635    0.38084  -2.616  0.00889 ** 
## regionToronto        0.60953    0.45451   1.341  0.17989    
## regionWest          -0.51509    0.25338  -2.033  0.04206 *  
## total_complaints     0.24279    0.08404   2.889  0.00387 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 723.78  on 605  degrees of freedom
## AIC: 743.78
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -home_type)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ number_beds + family_council + region + 
##     total_complaints, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0172  -0.9768  -0.4551   1.0108   2.0083  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -2.85512    0.46241  -6.174 6.64e-10 ***
## number_beds        0.19801    0.03993   4.959 7.07e-07 ***
## family_councilYes  0.10140    0.25050   0.405  0.68561    
## regionEast        -0.50000    0.26405  -1.894  0.05828 .  
## regionNorth       -1.03173    0.37660  -2.740  0.00615 ** 
## regionToronto      0.67297    0.45206   1.489  0.13657    
## regionWest        -0.57752    0.25091  -2.302  0.02135 *  
## total_complaints   0.25978    0.08097   3.208  0.00134 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 729.86  on 607  degrees of freedom
## AIC: 745.86
## 
## Number of Fisher Scoring iterations: 4

As shown above when all variables are included in the initial regression and then backwards selection is used for variable selection, the significant variables are number of beds, total complaints and region.

# Calculate McFadden's pseudo R2
ll.null <- fit$null.deviance/-2
ll.proposed <- fit$deviance/-2
print((ll.null - ll.proposed)/ll.null)
## [1] 0.1423571
1 - pchisq(2*(ll.proposed - ll.null), df = (length(fit$coefficients)-1)) #pvalue
## [1] 0

Visualize the model

# Create a new dataframe showing probability of outbreak and outbreak status
predicted.data <- data.frame(probability.of.outbreak=fit$fitted.values, outbreak=data$outbreak)

# Sort the above df
predicted.data <- predicted.data[order(predicted.data$probability.of.outbreak, decreasing=FALSE),]

# Add new col to df with rank
predicted.data$rank <- 1:nrow(predicted.data)

library(ggplot2)
library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************
ggplot(data = predicted.data, aes(x=rank, y=probability.of.outbreak)) +
  geom_point(aes(color=outbreak), alpha = 0.5, shape = 4, stroke = 1) +
  xlab("Index") + ylab("Predicted probability of an outbreak") +
  ggtitle("Outbreak Status Ordered By Predicted Probability of an Outbreak") + 
  scale_color_manual(values = c('blue', 'red'))

Repeat the logistic regression with selected variables

data_selected <- subset(df, select= c('outbreak', 'home_type', 'number_beds', noncomplaints, complaints, withOrders))
skim(data_selected)
Data summary
Name data_selected
Number of rows 615
Number of columns 12
_______________________
Column type frequency:
factor 2
numeric 10
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
outbreak 0 1 FALSE 2 no: 323, yes: 292
home_type 0 1 FALSE 3 For: 351, Non: 164, Mun: 100

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
number_beds 0 1 127.27 73.83 12 69 120 160 543 ▇▇▁▁▁
total_noncomplaints 0 1 22.98 12.09 5 15 20 28 90 ▇▃▁▁▁
X5y_noncomplaints 0 1 13.23 7.11 4 9 11 15 68 ▇▂▁▁▁
X2y_noncomplaints 0 1 6.00 3.64 0 4 5 8 40 ▇▂▁▁▁
total_complaints 0 1 14.70 11.64 0 7 12 20 94 ▇▂▁▁▁
X5y_complaints 0 1 6.60 5.93 0 2 5 9 46 ▇▂▁▁▁
X2y_complaints 0 1 3.20 2.86 0 1 3 5 20 ▇▂▁▁▁
total_withOrders 0 1 6.88 6.17 0 3 5 9 43 ▇▂▁▁▁
X5y_withOrders 0 1 4.41 4.35 0 2 3 6 34 ▇▂▁▁▁
X2y_withOrders 0 1 1.85 2.20 0 0 1 3 16 ▇▁▁▁▁
fit = glm(outbreak~., family=binomial, data=data_selected)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ ., family = binomial, data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4042  -0.9564  -0.6169   1.0565   1.9789  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.552933   0.267264  -5.810 6.23e-09 ***
## home_typeMunicipal  -0.551167   0.270037  -2.041   0.0412 *  
## home_typeNon-Profit  0.155759   0.216670   0.719   0.4722    
## number_beds          0.011545   0.001887   6.119 9.42e-10 ***
## total_noncomplaints -0.018206   0.019458  -0.936   0.3495    
## X5y_noncomplaints   -0.036013   0.043604  -0.826   0.4089    
## X2y_noncomplaints    0.021829   0.062826   0.347   0.7282    
## total_complaints     0.036504   0.020229   1.805   0.0711 .  
## X5y_complaints       0.015913   0.047024   0.338   0.7351    
## X2y_complaints       0.019416   0.062150   0.312   0.7547    
## total_withOrders    -0.027825   0.039733  -0.700   0.4837    
## X5y_withOrders       0.083603   0.075386   1.109   0.2674    
## X2y_withOrders      -0.024768   0.095839  -0.258   0.7961    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 730.55  on 602  degrees of freedom
## AIC: 756.55
## 
## Number of Fisher Scoring iterations: 4

Backwards Selection

fit <- update(fit, .~. -X2y_withOrders)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints + 
##     X5y_noncomplaints + X2y_noncomplaints + total_complaints + 
##     X5y_complaints + X2y_complaints + total_withOrders + X5y_withOrders, 
##     family = binomial, data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4122  -0.9625  -0.6219   1.0537   1.9714  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.555533   0.267159  -5.822 5.80e-09 ***
## home_typeMunicipal  -0.551354   0.270000  -2.042   0.0411 *  
## home_typeNon-Profit  0.154674   0.216629   0.714   0.4752    
## number_beds          0.011544   0.001886   6.121 9.32e-10 ***
## total_noncomplaints -0.018217   0.019449  -0.937   0.3489    
## X5y_noncomplaints   -0.030634   0.038328  -0.799   0.4241    
## X2y_noncomplaints    0.011411   0.048182   0.237   0.8128    
## total_complaints     0.036240   0.020193   1.795   0.0727 .  
## X5y_complaints       0.015795   0.047012   0.336   0.7369    
## X2y_complaints       0.018421   0.061941   0.297   0.7662    
## total_withOrders    -0.026402   0.039319  -0.671   0.5019    
## X5y_withOrders       0.071508   0.059097   1.210   0.2263    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 730.61  on 603  degrees of freedom
## AIC: 754.61
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_complaints)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints + 
##     X5y_noncomplaints + X2y_noncomplaints + total_complaints + 
##     X5y_complaints + total_withOrders + X5y_withOrders, family = binomial, 
##     data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4176  -0.9554  -0.6266   1.0543   1.9748  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.54653    0.26542  -5.827 5.65e-09 ***
## home_typeMunicipal  -0.55716    0.26942  -2.068   0.0386 *  
## home_typeNon-Profit  0.15254    0.21643   0.705   0.4809    
## number_beds          0.01166    0.00185   6.299 3.00e-10 ***
## total_noncomplaints -0.01909    0.01925  -0.992   0.3213    
## X5y_noncomplaints   -0.03146    0.03821  -0.823   0.4103    
## X2y_noncomplaints    0.01450    0.04691   0.309   0.7572    
## total_complaints     0.03569    0.02011   1.775   0.0759 .  
## X5y_complaints       0.02435    0.03737   0.651   0.5148    
## total_withOrders    -0.02549    0.03918  -0.651   0.5153    
## X5y_withOrders       0.07052    0.05900   1.195   0.2320    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 730.70  on 604  degrees of freedom
## AIC: 752.7
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_noncomplaints)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints + 
##     X5y_noncomplaints + total_complaints + X5y_complaints + total_withOrders + 
##     X5y_withOrders, family = binomial, data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4284  -0.9502  -0.6204   1.0552   1.9600  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.550158   0.265675  -5.835 5.39e-09 ***
## home_typeMunicipal  -0.551389   0.268796  -2.051   0.0402 *  
## home_typeNon-Profit  0.156706   0.215950   0.726   0.4680    
## number_beds          0.011715   0.001842   6.359 2.04e-10 ***
## total_noncomplaints -0.018734   0.019214  -0.975   0.3295    
## X5y_noncomplaints   -0.025634   0.033244  -0.771   0.4407    
## total_complaints     0.036176   0.020051   1.804   0.0712 .  
## X5y_complaints       0.022213   0.036715   0.605   0.5452    
## total_withOrders    -0.027152   0.038859  -0.699   0.4847    
## X5y_withOrders       0.073765   0.058112   1.269   0.2043    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 730.80  on 605  degrees of freedom
## AIC: 750.8
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_withOrders)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints + 
##     X5y_noncomplaints + total_complaints + X5y_complaints + X5y_withOrders, 
##     family = binomial, data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5331  -0.9615  -0.6241   1.0680   1.9709  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.546460   0.266125  -5.811 6.21e-09 ***
## home_typeMunicipal  -0.546342   0.268538  -2.035   0.0419 *  
## home_typeNon-Profit  0.145870   0.215560   0.677   0.4986    
## number_beds          0.011721   0.001843   6.358 2.04e-10 ***
## total_noncomplaints -0.023973   0.017653  -1.358   0.1745    
## X5y_noncomplaints   -0.019374   0.032015  -0.605   0.5451    
## total_complaints     0.033372   0.019554   1.707   0.0879 .  
## X5y_complaints       0.028112   0.035697   0.788   0.4310    
## X5y_withOrders       0.040192   0.032489   1.237   0.2160    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 731.29  on 606  degrees of freedom
## AIC: 749.29
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_noncomplaints)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints + 
##     total_complaints + X5y_complaints + X5y_withOrders, family = binomial, 
##     data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5263  -0.9616  -0.6166   1.0543   1.9948  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.58272    0.25879  -6.116 9.61e-10 ***
## home_typeMunicipal  -0.54990    0.26865  -2.047  0.04067 *  
## home_typeNon-Profit  0.14367    0.21535   0.667  0.50467    
## number_beds          0.01168    0.00184   6.348 2.17e-10 ***
## total_noncomplaints -0.03167    0.01227  -2.581  0.00986 ** 
## total_complaints     0.03768    0.01830   2.059  0.03948 *  
## X5y_complaints       0.01949    0.03270   0.596  0.55112    
## X5y_withOrders       0.03018    0.02789   1.082  0.27932    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 731.66  on 607  degrees of freedom
## AIC: 747.66
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_complaints)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints + 
##     total_complaints + X5y_withOrders, family = binomial, data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5648  -0.9623  -0.6222   1.0539   1.9893  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.600855   0.256928  -6.231 4.64e-10 ***
## home_typeMunicipal  -0.541607   0.267981  -2.021  0.04327 *  
## home_typeNon-Profit  0.146247   0.215209   0.680  0.49678    
## number_beds          0.011743   0.001836   6.397 1.58e-10 ***
## total_noncomplaints -0.030982   0.012173  -2.545  0.01093 *  
## total_complaints     0.045207   0.013412   3.371  0.00075 ***
## X5y_withOrders       0.032378   0.027472   1.179  0.23856    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 732.01  on 608  degrees of freedom
## AIC: 746.01
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_withOrders)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints + 
##     total_complaints, family = binomial, data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5854  -0.9661  -0.6290   1.0365   1.9760  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.60829    0.25716  -6.254 4.00e-10 ***
## home_typeMunicipal  -0.56603    0.26722  -2.118  0.03416 *  
## home_typeNon-Profit  0.16650    0.21424   0.777  0.43707    
## number_beds          0.01174    0.00183   6.415 1.41e-10 ***
## total_noncomplaints -0.02500    0.01113  -2.246  0.02468 *  
## total_complaints     0.04584    0.01344   3.410  0.00065 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 733.42  on 609  degrees of freedom
## AIC: 745.42
## 
## Number of Fisher Scoring iterations: 4

Repeat the logistic regression with selected variables

data_selected <- subset(df, select= c('outbreak', 'home_type', 'number_beds', all_inspections, complaints, withOrders))
skim(data_selected)
Data summary
Name data_selected
Number of rows 615
Number of columns 12
_______________________
Column type frequency:
factor 2
numeric 10
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
outbreak 0 1 FALSE 2 no: 323, yes: 292
home_type 0 1 FALSE 3 For: 351, Non: 164, Mun: 100

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
number_beds 0 1 127.27 73.83 12 69 120 160 543 ▇▇▁▁▁
total_inspections 0 1 37.68 21.95 9 22 32 47 172 ▇▃▁▁▁
X5y_inspections 0 1 19.82 11.95 5 12 17 24 86 ▇▃▁▁▁
X2y_inspections 0 1 9.20 5.59 1 5 8 12 44 ▇▅▁▁▁
total_complaints 0 1 14.70 11.64 0 7 12 20 94 ▇▂▁▁▁
X5y_complaints 0 1 6.60 5.93 0 2 5 9 46 ▇▂▁▁▁
X2y_complaints 0 1 3.20 2.86 0 1 3 5 20 ▇▂▁▁▁
total_withOrders 0 1 6.88 6.17 0 3 5 9 43 ▇▂▁▁▁
X5y_withOrders 0 1 4.41 4.35 0 2 3 6 34 ▇▂▁▁▁
X2y_withOrders 0 1 1.85 2.20 0 0 1 3 16 ▇▁▁▁▁
fit = glm(outbreak~., family=binomial, data=data_selected)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ ., family = binomial, data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4042  -0.9564  -0.6169   1.0565   1.9789  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.552933   0.267264  -5.810 6.23e-09 ***
## home_typeMunicipal  -0.551167   0.270037  -2.041   0.0412 *  
## home_typeNon-Profit  0.155759   0.216670   0.719   0.4722    
## number_beds          0.011545   0.001887   6.119 9.42e-10 ***
## total_inspections   -0.018206   0.019458  -0.936   0.3495    
## X5y_inspections     -0.036013   0.043604  -0.826   0.4089    
## X2y_inspections      0.021829   0.062826   0.347   0.7282    
## total_complaints     0.054710   0.032437   1.687   0.0917 .  
## X5y_complaints       0.051926   0.073004   0.711   0.4769    
## X2y_complaints      -0.002413   0.093847  -0.026   0.9795    
## total_withOrders    -0.027825   0.039733  -0.700   0.4837    
## X5y_withOrders       0.083603   0.075386   1.109   0.2674    
## X2y_withOrders      -0.024768   0.095839  -0.258   0.7961    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 730.55  on 602  degrees of freedom
## AIC: 756.55
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_complaints)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections + 
##     X5y_inspections + X2y_inspections + total_complaints + X5y_complaints + 
##     total_withOrders + X5y_withOrders + X2y_withOrders, family = binomial, 
##     data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4046  -0.9577  -0.6165   1.0573   1.9777  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.553778   0.265262  -5.858 4.70e-09 ***
## home_typeMunicipal  -0.550539   0.268922  -2.047   0.0406 *  
## home_typeNon-Profit  0.156037   0.216405   0.721   0.4709    
## number_beds          0.011540   0.001876   6.151 7.69e-10 ***
## total_inspections   -0.018133   0.019246  -0.942   0.3461    
## X5y_inspections     -0.035507   0.038904  -0.913   0.3614    
## X2y_inspections      0.020611   0.041268   0.499   0.6175    
## total_complaints     0.054684   0.032418   1.687   0.0916 .  
## X5y_complaints       0.050758   0.057118   0.889   0.3742    
## total_withOrders    -0.027902   0.039623  -0.704   0.4813    
## X5y_withOrders       0.083342   0.074707   1.116   0.2646    
## X2y_withOrders      -0.023812   0.088341  -0.270   0.7875    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 730.55  on 603  degrees of freedom
## AIC: 754.55
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_withOrders)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections + 
##     X5y_inspections + X2y_inspections + total_complaints + X5y_complaints + 
##     total_withOrders + X5y_withOrders, family = binomial, data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4119  -0.9627  -0.6239   1.0494   1.9743  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.552962   0.265202  -5.856 4.75e-09 ***
## home_typeMunicipal  -0.553511   0.268716  -2.060   0.0394 *  
## home_typeNon-Profit  0.153588   0.216207   0.710   0.4775    
## number_beds          0.011561   0.001874   6.169 6.88e-10 ***
## total_inspections   -0.018469   0.019207  -0.962   0.3363    
## X5y_inspections     -0.031660   0.036187  -0.875   0.3816    
## X2y_inspections      0.014203   0.033734   0.421   0.6737    
## total_complaints     0.054512   0.032398   1.683   0.0925 .  
## X5y_complaints       0.049712   0.056960   0.873   0.3828    
## total_withOrders    -0.025952   0.038914  -0.667   0.5048    
## X5y_withOrders       0.070805   0.058454   1.211   0.2258    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 730.62  on 604  degrees of freedom
## AIC: 752.62
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_inspections)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections + 
##     X5y_inspections + total_complaints + X5y_complaints + total_withOrders + 
##     X5y_withOrders, family = binomial, data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4284  -0.9502  -0.6204   1.0552   1.9600  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.550158   0.265675  -5.835 5.39e-09 ***
## home_typeMunicipal  -0.551389   0.268796  -2.051   0.0402 *  
## home_typeNon-Profit  0.156706   0.215950   0.726   0.4680    
## number_beds          0.011715   0.001842   6.359 2.04e-10 ***
## total_inspections   -0.018734   0.019214  -0.975   0.3295    
## X5y_inspections     -0.025634   0.033244  -0.771   0.4407    
## total_complaints     0.054910   0.032392   1.695   0.0900 .  
## X5y_complaints       0.047847   0.056775   0.843   0.3994    
## total_withOrders    -0.027152   0.038859  -0.699   0.4847    
## X5y_withOrders       0.073765   0.058112   1.269   0.2043    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 730.80  on 605  degrees of freedom
## AIC: 750.8
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_withOrders)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections + 
##     X5y_inspections + total_complaints + X5y_complaints + X5y_withOrders, 
##     family = binomial, data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5331  -0.9615  -0.6241   1.0680   1.9709  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.546460   0.266125  -5.811 6.21e-09 ***
## home_typeMunicipal  -0.546342   0.268538  -2.035   0.0419 *  
## home_typeNon-Profit  0.145870   0.215560   0.677   0.4986    
## number_beds          0.011721   0.001843   6.358 2.04e-10 ***
## total_inspections   -0.023973   0.017653  -1.358   0.1745    
## X5y_inspections     -0.019374   0.032015  -0.605   0.5451    
## total_complaints     0.057345   0.032101   1.786   0.0740 .  
## X5y_complaints       0.047486   0.056712   0.837   0.4024    
## X5y_withOrders       0.040192   0.032489   1.237   0.2160    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 731.29  on 606  degrees of freedom
## AIC: 749.29
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_inspections)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections + 
##     total_complaints + X5y_complaints + X5y_withOrders, family = binomial, 
##     data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5263  -0.9616  -0.6166   1.0543   1.9948  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.58272    0.25879  -6.116 9.61e-10 ***
## home_typeMunicipal  -0.54990    0.26865  -2.047  0.04067 *  
## home_typeNon-Profit  0.14367    0.21535   0.667  0.50467    
## number_beds          0.01168    0.00184   6.348 2.17e-10 ***
## total_inspections   -0.03167    0.01227  -2.581  0.00986 ** 
## total_complaints     0.06935    0.02538   2.732  0.00629 ** 
## X5y_complaints       0.01949    0.03270   0.596  0.55112    
## X5y_withOrders       0.03018    0.02789   1.082  0.27932    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 731.66  on 607  degrees of freedom
## AIC: 747.66
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_complaints)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections + 
##     total_complaints + X5y_withOrders, family = binomial, data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5648  -0.9623  -0.6222   1.0539   1.9893  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.600855   0.256928  -6.231 4.64e-10 ***
## home_typeMunicipal  -0.541607   0.267981  -2.021 0.043273 *  
## home_typeNon-Profit  0.146247   0.215209   0.680 0.496784    
## number_beds          0.011743   0.001836   6.397 1.58e-10 ***
## total_inspections   -0.030982   0.012173  -2.545 0.010926 *  
## total_complaints     0.076189   0.022745   3.350 0.000809 ***
## X5y_withOrders       0.032378   0.027472   1.179 0.238560    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 732.01  on 608  degrees of freedom
## AIC: 746.01
## 
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_withOrders)
summary(fit)
## 
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections + 
##     total_complaints, family = binomial, data = data_selected)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5854  -0.9661  -0.6290   1.0365   1.9760  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -1.60829    0.25716  -6.254 4.00e-10 ***
## home_typeMunicipal  -0.56603    0.26722  -2.118  0.03416 *  
## home_typeNon-Profit  0.16650    0.21424   0.777  0.43707    
## number_beds          0.01174    0.00183   6.415 1.41e-10 ***
## total_inspections   -0.02500    0.01113  -2.246  0.02468 *  
## total_complaints     0.07084    0.02235   3.170  0.00153 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 851.01  on 614  degrees of freedom
## Residual deviance: 733.42  on 609  degrees of freedom
## AIC: 745.42
## 
## Number of Fisher Scoring iterations: 4